In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=2, stride=2, padding=0),  
            nn.ELU(),
            nn.Conv2d(32, 32, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(32, 64, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(64, 128, kernel_size=2, stride=2, padding=0),
            nn.ELU(), 
            nn.Conv2d(128, 256, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Conv2d(256, 512, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
from torch.nn.functional import one_hot

class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        visual_features_dim = 512
        target_features_dim = 9 * 11 * 11 
        self.visual_encoder = VisualEncoder()
        self.visual_encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AngelaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.target_encoder = nn.Sequential(
            nn.Conv3d(7, 1, kernel_size=1, stride=1, padding=0),
            nn.ELU(),
        )
        policy_hidden_dim = 256 
        self.policy_network = nn.Sequential(
            nn.Linear(visual_features_dim + target_features_dim, 1024),
            nn.ELU(),
            nn.Linear(1024, 512),
            nn.ELU(),
            nn.Linear(512, policy_hidden_dim),
            nn.ELU(),
            nn.Linear(policy_hidden_dim, policy_hidden_dim),
            nn.ELU(),
            #nn.Linear(policy_hidden_dim, policy_hidden_dim),
            #nn.ELU(),
        )
        self.action_head = nn.Linear(policy_hidden_dim, action_space.n)
        self.value_head = nn.Linear(policy_hidden_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.visual_encoder.cuda()
            self.target_encoder.cuda()
            self.policy_network.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs']
        pov = obs['pov'].permute(0, 3, 1, 2).float() / 255.0
        target = one_hot(obs['target_grid'].long(), num_classes=7).permute(0, 4, 1, 2, 3).float()
        if self.use_cuda:
            pov.cuda()
            target.cuda()
            
        with torch.no_grad():
            visual_features = self.visual_encoder(pov)
            
        target_features = self.target_encoder(target)
        target_features = target_features.reshape(target_features.shape[0], -1)
        features = torch.cat([visual_features, target_features], dim=1)
        features = self.policy_network(features)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
class VisualObservationWrapper(ObsWrapper):
    def __init__(self, env, include_target=False):
        super().__init__(env)
        self.observation_space = {   
            'pov': gym.spaces.Box(low=0, high=255, shape=(64, 64, 3)),
            'inventory': gym.spaces.Box(low=0.0, high=20.0, shape=(6,)),
            'compass': gym.spaces.Box(low=-180.0, high=180.0, shape=(1,))
        }
        if include_target:
            self.observation_space['target_grid'] = \
                gym.spaces.Box(low=0, high=6, shape=(9, 11, 11))
        self.observation_space = gym.spaces.Dict(self.observation_space)

    def observation(self, obs, reward=None, done=None, info=None):
        if info is not None:
            if 'target_grid' in info:
                target_grid = info['target_grid']
                del info['target_grid']
            else:
                logger.error(f'info: {info}')
                if hasattr(self.unwrapped, 'should_reset'):
                    self.unwrapped.should_reset(True)
                target_grid = self.env.unwrapped.tasks.current.target_grid
        else:
            target_grid = self.env.unwrapped.tasks.current.target_grid
        return {
            'pov': obs['pov'].astype(np.float32),
            'inventory': obs['inventory'],
            'compass': np.array([obs['compass']['angle'].item()]),
            'target_grid': target_grid
        }

In [6]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, rew):
        if rew == 0:
            rew = -0.01
        return rew
    
def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C3', 'C17', 'C32']))
    #env = PovOnlyWrapper(env)
    env = VisualObservationWrapper(env, include_target=True)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    env = RewardWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             #"gamma": 0.99,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO MultiTask (C3, C17, C32) pretrained (AngelaCNN) (3 noops after placement) r: -0.01"
                  }
              }

        },
        loggers=[WandbLogger],
        local_dir="/IGLU-Minecraft/checkpoints/",
        keep_checkpoints_num=50,
        checkpoint_freq=5,
        checkpoint_at_end=True)



Trial name,status,loc
PPO_my_env_549f0_00000,PENDING,


2021-11-05 12:31:42,213	INFO wandb.py:170 -- Already logged into W&B.
2021-11-05 12:31:42,226	ERROR syncer.py:72 -- Log sync requires rsync to be installed.
[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.6 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=307252)[0m 2021-11-05 12:31:45,655	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=307252)[0m 2021-11-05 12:31:45,655	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-11-05_12-32-52
  done: false
  episode_len_mean: 393.0
  episode_media: {}
  episode_reward_max: -7.769999999999953
  episode_reward_mean: -14.065000000000035
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 2
  episodes_total: 2
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.882830344306098
          entropy_coeff: 0.009999999999999998
          kl: 0.00782593100867451
          policy_loss: 0.11407342735264037
          total_loss: 0.5807270208166705
          vf_explained_var: 0.2950565218925476
          vf_loss: 0.4939167136947314
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1,61.0187,1000,-14.065,-7.77,-20.36,393


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-11-05_12-33-12
  done: false
  episode_len_mean: 402.25
  episode_media: {}
  episode_reward_max: -4.109999999999957
  episode_reward_mean: -9.089999999999996
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 2
  episodes_total: 4
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8729226801130507
          entropy_coeff: 0.009999999999999998
          kl: 0.008935499472480816
          policy_loss: -0.06010568820767932
          total_loss: -0.07399718142631981
          vf_explained_var: 0.4425891935825348
          vf_loss: 0.013050633063539863
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,2,81.3515,2000,-9.09,-4.11,-20.36,402.25


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-11-05_12-33-33
  done: false
  episode_len_mean: 402.85714285714283
  episode_media: {}
  episode_reward_max: -3.8699999999999615
  episode_reward_mean: -8.134285714285692
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 7
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8641971561643813
          entropy_coeff: 0.009999999999999998
          kl: 0.010877699147854544
          policy_loss: 0.03254767855008443
          total_loss: 0.2831236344244745
          vf_explained_var: -0.006795969326049089
          vf_loss: 0.2770423868050178
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,3,102.214,3000,-8.13429,-3.87,-20.36,402.857


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-11-05_12-33-52
  done: false
  episode_len_mean: 404.6666666666667
  episode_media: {}
  episode_reward_max: -3.8699999999999615
  episode_reward_mean: -7.239999999999973
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 2
  episodes_total: 9
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8588473346498278
          entropy_coeff: 0.009999999999999998
          kl: 0.009051222671889016
          policy_loss: -0.07592426364620526
          total_loss: -0.08197728503081533
          vf_explained_var: 0.5241923928260803
          vf_loss: 0.02072521210130718
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,4,121.036,4000,-7.24,-3.87,-20.36,404.667


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-11-05_12-34-12
  done: false
  episode_len_mean: 404.6666666666667
  episode_media: {}
  episode_reward_max: -3.8699999999999615
  episode_reward_mean: -7.000833333333303
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 12
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8527900483873156
          entropy_coeff: 0.009999999999999998
          kl: 0.008160790990485485
          policy_loss: 0.03313031088974741
          total_loss: 0.31359351025894283
          vf_explained_var: -0.3932191729545593
          vf_loss: 0.3073589350200362
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 50

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,5,140.858,5000,-7.00083,-3.87,-20.36,404.667


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-11-05_12-34-31
  done: false
  episode_len_mean: 406.07142857142856
  episode_media: {}
  episode_reward_max: -3.8699999999999615
  episode_reward_mean: -6.592857142857111
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 2
  episodes_total: 14
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8388897869322034
          entropy_coeff: 0.009999999999999998
          kl: 0.008260270009552428
          policy_loss: 0.040926118360625374
          total_loss: 0.0417736561761962
          vf_explained_var: -0.2226366400718689
          vf_loss: 0.027584380594392617
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,6,160.265,6000,-6.59286,-3.87,-20.36,406.071


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-11-05_12-34-49
  done: false
  episode_len_mean: 407.47058823529414
  episode_media: {}
  episode_reward_max: -3.8699999999999615
  episode_reward_mean: -6.1599999999999655
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 17
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8188014692730374
          entropy_coeff: 0.009999999999999998
          kl: 0.00953898321457405
          policy_loss: -0.040750801811615625
          total_loss: -0.0562101360824373
          vf_explained_var: 0.4826466739177704
          vf_loss: 0.010820883367624547
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,7,178.458,7000,-6.16,-3.87,-20.36,407.471


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-11-05_12-35-09
  done: false
  episode_len_mean: 405.6842105263158
  episode_media: {}
  episode_reward_max: -3.8699999999999615
  episode_reward_mean: -5.9226315789473345
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 2
  episodes_total: 19
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.811685175365872
          entropy_coeff: 0.009999999999999998
          kl: 0.008390322766050515
          policy_loss: -0.12449428306685553
          total_loss: -0.1413905820912785
          vf_explained_var: 0.3833427131175995
          vf_loss: 0.00954249018492798
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,8,197.644,8000,-5.92263,-3.87,-20.36,405.684


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-11-05_12-35-28
  done: false
  episode_len_mean: 404.1818181818182
  episode_media: {}
  episode_reward_max: -3.8099999999999627
  episode_reward_mean: -5.653181818181783
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 22
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7942037211524116
          entropy_coeff: 0.009999999999999998
          kl: 0.009598330017711263
          policy_loss: -0.01459073887930976
          total_loss: -0.028183005584610833
          vf_explained_var: 0.4604661166667938
          vf_loss: 0.012430104344255394
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,9,217.058,9000,-5.65318,-3.81,-20.36,404.182


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-11-05_12-35-46
  done: false
  episode_len_mean: 405.375
  episode_media: {}
  episode_reward_max: -3.8099999999999627
  episode_reward_mean: -5.530833333333297
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 2
  episodes_total: 24
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8012418111165363
          entropy_coeff: 0.009999999999999998
          kl: 0.009571361675313316
          policy_loss: -0.02471850568221675
          total_loss: -0.04582095986439122
          vf_explained_var: 0.8440366387367249
          vf_loss: 0.0049956919237350425
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 1000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,10,235.165,10000,-5.53083,-3.81,-20.36,405.375


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-11-05_12-36-04
  done: false
  episode_len_mean: 406.5
  episode_media: {}
  episode_reward_max: -3.8099999999999627
  episode_reward_mean: -5.428461538461501
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 2
  episodes_total: 26
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7839134772618612
          entropy_coeff: 0.009999999999999998
          kl: 0.00962585272524601
          policy_loss: -0.06668154199918111
          total_loss: -0.08442996624443266
          vf_explained_var: 0.40234965085983276
          vf_loss: 0.008165539695053466
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_trained: 11000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,11,252.806,11000,-5.42846,-3.81,-20.36,406.5


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-11-05_12-36-23
  done: false
  episode_len_mean: 406.62068965517244
  episode_media: {}
  episode_reward_max: -3.8099999999999627
  episode_reward_mean: -5.288620689655135
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 29
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7808816300498114
          entropy_coeff: 0.009999999999999998
          kl: 0.006565106224834515
          policy_loss: 0.10026372493141228
          total_loss: 0.07747692305387723
          vf_explained_var: 0.6784605383872986
          vf_loss: 0.003708992581555827
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,12,271.939,12000,-5.28862,-3.81,-20.36,406.621




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-11-05_12-37-01
  done: false
  episode_len_mean: 405.96875
  episode_media: {}
  episode_reward_max: -3.45999999999997
  episode_reward_mean: -5.167499999999962
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 32
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7695451127158273
          entropy_coeff: 0.009999999999999998
          kl: 0.009334670375648196
          policy_loss: -0.017750290201769936
          total_loss: -0.02830542739894655
          vf_explained_var: 0.1521533727645874
          vf_loss: 0.015273377366570962
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 1300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,13,309.897,13000,-5.1675,-3.46,-20.36,405.969


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-11-05_12-37-20
  done: false
  episode_len_mean: 407.55882352941177
  episode_media: {}
  episode_reward_max: -3.45999999999997
  episode_reward_mean: -5.118235294117609
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 2
  episodes_total: 34
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.770231337017483
          entropy_coeff: 0.009999999999999998
          kl: 0.010483193081904218
          policy_loss: 0.10341601338651445
          total_loss: 0.08056323362721338
          vf_explained_var: 0.45135366916656494
          vf_loss: 0.002752896410270801
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,14,328.32,14000,-5.11824,-3.46,-20.36,407.559


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-11-05_12-37-39
  done: false
  episode_len_mean: 407.0
  episode_media: {}
  episode_reward_max: -3.45999999999997
  episode_reward_mean: -5.054722222222184
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 2
  episodes_total: 36
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.762281862894694
          entropy_coeff: 0.009999999999999998
          kl: 0.011339825895844294
          policy_loss: -0.0826113318403562
          total_loss: -0.10114162191748619
          vf_explained_var: 0.7471698522567749
          vf_loss: 0.006824558348550151
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 15000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,15,347.593,15000,-5.05472,-3.46,-20.36,407


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-11-05_12-37-58
  done: false
  episode_len_mean: 405.64102564102564
  episode_media: {}
  episode_reward_max: -3.45999999999997
  episode_reward_mean: -4.965384615384577
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 39
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.707990558942159
          entropy_coeff: 0.009999999999999998
          kl: 0.011465602866838242
          policy_loss: 0.007928958121273253
          total_loss: -0.00996715666519271
          vf_explained_var: 0.7416249513626099
          vf_loss: 0.006890668044798076
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,16,366.77,16000,-4.96538,-3.46,-20.36,405.641


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-11-05_12-38-18
  done: false
  episode_len_mean: 402.14285714285717
  episode_media: {}
  episode_reward_max: -3.45999999999997
  episode_reward_mean: -4.865476190476152
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 42
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6727495511372883
          entropy_coeff: 0.009999999999999998
          kl: 0.009320035053338671
          policy_loss: 0.024449416249990464
          total_loss: 0.005339572827021281
          vf_explained_var: 0.7770556807518005
          vf_loss: 0.005753645476781659
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,17,387.041,17000,-4.86548,-3.46,-20.36,402.143


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-11-05_12-38-38
  done: false
  episode_len_mean: 398.6666666666667
  episode_media: {}
  episode_reward_max: -3.419999999999971
  episode_reward_mean: -4.774444444444407
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 45
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.656010871463352
          entropy_coeff: 0.009999999999999998
          kl: 0.01178384205432334
          policy_loss: -0.03135284028119511
          total_loss: -0.04919236260983679
          vf_explained_var: 0.7401263117790222
          vf_loss: 0.00636381761610715
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,18,406.912,18000,-4.77444,-3.42,-20.36,398.667


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-11-05_12-38-58
  done: false
  episode_len_mean: 395.4583333333333
  episode_media: {}
  episode_reward_max: -3.2999999999999736
  episode_reward_mean: -4.693124999999963
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 48
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5806274149152966
          entropy_coeff: 0.009999999999999998
          kl: 0.011672429415221054
          policy_loss: -0.021052212516466776
          total_loss: -0.03741184108787113
          vf_explained_var: 0.6626792550086975
          vf_loss: 0.007112163267770989
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,19,426.425,19000,-4.69312,-3.3,-20.36,395.458


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-11-05_12-39-18
  done: false
  episode_len_mean: 393.98
  episode_media: {}
  episode_reward_max: -3.2999999999999736
  episode_reward_mean: -4.648799999999963
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 2
  episodes_total: 50
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5565699206458197
          entropy_coeff: 0.009999999999999998
          kl: 0.011359630630111027
          policy_loss: -0.12598765426211886
          total_loss: -0.14198883954021665
          vf_explained_var: 0.6699394583702087
          vf_loss: 0.0072925887607400205
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,20,446.249,20000,-4.6488,-3.3,-20.36,393.98


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-11-05_12-39-37
  done: false
  episode_len_mean: 391.7358490566038
  episode_media: {}
  episode_reward_max: -3.2999999999999736
  episode_reward_mean: -4.586226415094303
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 53
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.457754145728217
          entropy_coeff: 0.009999999999999998
          kl: 0.010191735251478798
          policy_loss: -0.06624039792352253
          total_loss: -0.07991743592752351
          vf_explained_var: 0.40933993458747864
          vf_loss: 0.008862158368962507
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,21,465.123,21000,-4.58623,-3.3,-20.36,391.736


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-11-05_12-39-55
  done: false
  episode_len_mean: 389.51785714285717
  episode_media: {}
  episode_reward_max: -3.2999999999999736
  episode_reward_mean: -4.52821428571425
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 56
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4113137986924915
          entropy_coeff: 0.009999999999999998
          kl: 0.009989629969730391
          policy_loss: 0.011115433606836532
          total_loss: -0.0016575051678551567
          vf_explained_var: 0.06734275072813034
          vf_loss: 0.00934227196396225
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,22,484.022,22000,-4.52821,-3.3,-20.36,389.518


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-11-05_12-40-17
  done: false
  episode_len_mean: 385.64406779661016
  episode_media: {}
  episode_reward_max: -3.049999999999979
  episode_reward_mean: -4.457288135593184
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 59
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.3602216323216756
          entropy_coeff: 0.009999999999999998
          kl: 0.011276595054112137
          policy_loss: -0.10900493918193711
          total_loss: -0.11566324507196744
          vf_explained_var: 0.16704466938972473
          vf_loss: 0.014688591690113146
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,23,505.116,23000,-4.45729,-3.05,-20.36,385.644




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-11-05_12-40-54
  done: false
  episode_len_mean: 382.16129032258067
  episode_media: {}
  episode_reward_max: -2.679999999999987
  episode_reward_mean: -4.393387096774158
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 62
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.313277636633979
          entropy_coeff: 0.009999999999999998
          kl: 0.01010654412783596
          policy_loss: -0.1338150593969557
          total_loss: -0.14109042328264979
          vf_explained_var: 0.24628783762454987
          vf_loss: 0.013836104391763608
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,24,542.05,24000,-4.39339,-2.68,-20.36,382.161


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-11-05_12-41-14
  done: false
  episode_len_mean: 378.5
  episode_media: {}
  episode_reward_max: -2.679999999999987
  episode_reward_mean: -4.3221212121211785
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 4
  episodes_total: 66
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.2366691403918795
          entropy_coeff: 0.009999999999999998
          kl: 0.0111124268291339
          policy_loss: -0.015328313700026935
          total_loss: -0.022150740772485734
          vf_explained_var: 0.250299334526062
          vf_loss: 0.013321777888470226
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_trained: 25000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,25,562.443,25000,-4.32212,-2.68,-20.36,378.5


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-11-05_12-41-35
  done: false
  episode_len_mean: 375.40579710144925
  episode_media: {}
  episode_reward_max: -2.679999999999987
  episode_reward_mean: -4.2678260869564895
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 69
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.221575631035699
          entropy_coeff: 0.009999999999999998
          kl: 0.005837017675918608
          policy_loss: 0.08990898662143283
          total_loss: 0.07616008006864124
          vf_explained_var: 0.3489055931568146
          vf_loss: 0.007299443699432433
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,26,583.309,26000,-4.26783,-2.68,-20.36,375.406


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-11-05_12-41-56
  done: false
  episode_len_mean: 372.31944444444446
  episode_media: {}
  episode_reward_max: -2.679999999999987
  episode_reward_mean: -4.215555555555522
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 72
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1426509459813436
          entropy_coeff: 0.009999999999999998
          kl: 0.013454339247326412
          policy_loss: -0.004865548676914639
          total_loss: -0.012880919873714447
          vf_explained_var: 0.2237168252468109
          vf_loss: 0.01072026797466808
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,27,604.61,27000,-4.21556,-2.68,-20.36,372.319


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-11-05_12-42-17
  done: false
  episode_len_mean: 369.62666666666667
  episode_media: {}
  episode_reward_max: -2.679999999999987
  episode_reward_mean: -4.1689333333333005
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 75
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0925032483206856
          entropy_coeff: 0.009999999999999998
          kl: 0.009834614028332883
          policy_loss: -0.1087154358625412
          total_loss: -0.11246394291520119
          vf_explained_var: 0.17601707577705383
          vf_loss: 0.015209603081974719
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,28,625.734,28000,-4.16893,-2.68,-20.36,369.627


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-11-05_12-42-39
  done: false
  episode_len_mean: 366.1518987341772
  episode_media: {}
  episode_reward_max: -2.679999999999987
  episode_reward_mean: -4.110253164556929
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 4
  episodes_total: 79
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.084726007779439
          entropy_coeff: 0.009999999999999998
          kl: 0.008586522364585254
          policy_loss: 0.009296618981493844
          total_loss: 0.005862432097395261
          vf_explained_var: 0.24877777695655823
          vf_loss: 0.01569576966058877
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,29,647.218,29000,-4.11025,-2.68,-20.36,366.152


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-11-05_12-43-00
  done: false
  episode_len_mean: 363.7439024390244
  episode_media: {}
  episode_reward_max: -2.679999999999987
  episode_reward_mean: -4.069756097560944
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 82
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.164926528930664
          entropy_coeff: 0.009999999999999998
          kl: 0.007336369882177528
          policy_loss: 0.03696915027168062
          total_loss: 0.02733838889333937
          vf_explained_var: 0.4109404385089874
          vf_loss: 0.010551228658813569
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,30,668.798,30000,-4.06976,-2.68,-20.36,363.744


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-11-05_12-43-22
  done: false
  episode_len_mean: 361.91764705882355
  episode_media: {}
  episode_reward_max: -2.679999999999987
  episode_reward_mean: -4.036235294117616
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 85
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1150487449434068
          entropy_coeff: 0.009999999999999998
          kl: 0.008275620333352407
          policy_loss: -0.09874523116482628
          total_loss: -0.10385881215333939
          vf_explained_var: 0.27143755555152893
          vf_loss: 0.014381779823452234
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,31,690.585,31000,-4.03624,-2.68,-20.36,361.918


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-11-05_12-43-44
  done: false
  episode_len_mean: 359.2696629213483
  episode_media: {}
  episode_reward_max: -2.679999999999987
  episode_reward_mean: -3.9910112359550256
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 4
  episodes_total: 89
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0949324289957683
          entropy_coeff: 0.009999999999999998
          kl: 0.010764761937794019
          policy_loss: -0.011421014120181401
          total_loss: -0.017406768889890775
          vf_explained_var: 0.3706802725791931
          vf_loss: 0.01281061764392588
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,32,711.826,32000,-3.99101,-2.68,-20.36,359.27




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-11-05_12-44-22
  done: false
  episode_len_mean: 357.1847826086956
  episode_media: {}
  episode_reward_max: -2.6399999999999877
  episode_reward_mean: -3.957173913043448
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 92
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.093770424524943
          entropy_coeff: 0.009999999999999998
          kl: 0.00566456945284328
          policy_loss: 0.04190777755445904
          total_loss: 0.03388593610790041
          vf_explained_var: 0.4969539940357208
          vf_loss: 0.011782944665497376
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,33,750.234,33000,-3.95717,-2.64,-20.36,357.185


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-11-05_12-44-43
  done: false
  episode_len_mean: 355.58947368421053
  episode_media: {}
  episode_reward_max: -2.6399999999999877
  episode_reward_mean: -3.9290526315789167
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 95
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.113132545683119
          entropy_coeff: 0.009999999999999998
          kl: 0.007464255510599137
          policy_loss: -0.003293868237071567
          total_loss: -0.012736073964171939
          vf_explained_var: 0.34183329343795776
          vf_loss: 0.010196272873630126
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,34,771.266,34000,-3.92905,-2.64,-20.36,355.589


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-11-05_12-45-04
  done: false
  episode_len_mean: 354.11224489795916
  episode_media: {}
  episode_reward_max: -2.6399999999999877
  episode_reward_mean: -3.902857142857113
  episode_reward_min: -20.360000000000117
  episodes_this_iter: 3
  episodes_total: 98
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.07384991645813
          entropy_coeff: 0.009999999999999998
          kl: 0.011153487035546316
          policy_loss: -0.1013819310400221
          total_loss: -0.10878999167018466
          vf_explained_var: 0.36857476830482483
          vf_loss: 0.01109973499034014
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,35,792.117,35000,-3.90286,-2.64,-20.36,354.112


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-11-05_12-45-24
  done: false
  episode_len_mean: 351.64
  episode_media: {}
  episode_reward_max: -2.6399999999999877
  episode_reward_mean: -3.668199999999969
  episode_reward_min: -12.63999999999994
  episodes_this_iter: 4
  episodes_total: 102
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.078531575202942
          entropy_coeff: 0.009999999999999998
          kl: 0.01201153773660127
          policy_loss: 0.009343007538053725
          total_loss: 0.0026320403234826196
          vf_explained_var: 0.48136448860168457
          vf_loss: 0.011672035894460149
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained: 36000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,36,812.561,36000,-3.6682,-2.64,-12.64,351.64


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-11-05_12-45-45
  done: false
  episode_len_mean: 348.63
  episode_media: {}
  episode_reward_max: -2.6399999999999877
  episode_reward_mean: -3.6380999999999695
  episode_reward_min: -12.63999999999994
  episodes_this_iter: 3
  episodes_total: 105
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.040504088666704
          entropy_coeff: 0.009999999999999998
          kl: 0.0074570600869465365
          policy_loss: 0.04331095260050562
          total_loss: 0.03676211105452643
          vf_explained_var: 0.12136770039796829
          vf_loss: 0.012364785031725964
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 37000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,37,833.335,37000,-3.6381,-2.64,-12.64,348.63


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-11-05_12-46-06
  done: false
  episode_len_mean: 345.5
  episode_media: {}
  episode_reward_max: -2.6399999999999877
  episode_reward_mean: -3.52209999999997
  episode_reward_min: -10.619999999999967
  episodes_this_iter: 3
  episodes_total: 108
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9010114749272664
          entropy_coeff: 0.009999999999999998
          kl: 0.01152166114068272
          policy_loss: 0.02036845882733663
          total_loss: 0.016377534137831794
          vf_explained_var: 0.4265972673892975
          vf_loss: 0.012714859490127612
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained: 38000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,38,854.599,38000,-3.5221,-2.64,-10.62,345.5


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-11-05_12-46-28
  done: false
  episode_len_mean: 340.79
  episode_media: {}
  episode_reward_max: -2.6399999999999877
  episode_reward_mean: -3.4078999999999713
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 4
  episodes_total: 112
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9299324141608345
          entropy_coeff: 0.009999999999999998
          kl: 0.011333168836039045
          policy_loss: 0.013047667137450642
          total_loss: 0.009322765966256459
          vf_explained_var: 0.4762589633464813
          vf_loss: 0.013307786877784463
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trained: 39000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,39,876.504,39000,-3.4079,-2.64,-4.44,340.79


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-11-05_12-46-50
  done: false
  episode_len_mean: 337.3
  episode_media: {}
  episode_reward_max: -2.6399999999999877
  episode_reward_mean: -3.3729999999999722
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 3
  episodes_total: 115
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8985425326559278
          entropy_coeff: 0.009999999999999998
          kl: 0.010297292893747093
          policy_loss: 0.02324747774336073
          total_loss: 0.01550716327296363
          vf_explained_var: 0.3524312973022461
          vf_loss: 0.009185653836983774
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,40,898.264,40000,-3.373,-2.64,-4.44,337.3


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-11-05_12-47-12
  done: false
  episode_len_mean: 332.86
  episode_media: {}
  episode_reward_max: -2.6399999999999877
  episode_reward_mean: -3.3285999999999727
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 4
  episodes_total: 119
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8865081985791525
          entropy_coeff: 0.009999999999999998
          kl: 0.01102409143449651
          policy_loss: -0.034214231454663804
          total_loss: -0.03652898404333327
          vf_explained_var: 0.4984847605228424
          vf_loss: 0.0143455120217469
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 41000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,41,919.704,41000,-3.3286,-2.64,-4.44,332.86




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-11-05_12-47-51
  done: false
  episode_len_mean: 329.52
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.295199999999974
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 3
  episodes_total: 122
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.851734447479248
          entropy_coeff: 0.009999999999999998
          kl: 0.008576632284435151
          policy_loss: 0.0430338812371095
          total_loss: 0.03619584739208222
          vf_explained_var: 0.5432537198066711
          vf_loss: 0.009963984339265153
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,42,959.388,42000,-3.2952,-2.49,-4.44,329.52


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-11-05_12-48-13
  done: false
  episode_len_mean: 325.92
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.2591999999999746
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 3
  episodes_total: 125
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8186524762047662
          entropy_coeff: 0.009999999999999998
          kl: 0.008763741682328203
          policy_loss: -0.1056273785730203
          total_loss: -0.1112341416378816
          vf_explained_var: 0.5066964030265808
          vf_loss: 0.010827015423112445
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,43,981.211,43000,-3.2592,-2.49,-4.44,325.92


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-11-05_12-48-34
  done: false
  episode_len_mean: 321.45
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.214499999999976
  episode_reward_min: -4.43999999999995
  episodes_this_iter: 4
  episodes_total: 129
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.841858564482795
          entropy_coeff: 0.009999999999999998
          kl: 0.00920823243545788
          policy_loss: -0.008494078947438134
          total_loss: -0.01040429530872239
          vf_explained_var: 0.2663627862930298
          vf_loss: 0.014666721783578395
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,44,1001.99,44000,-3.2145,-2.49,-4.44,321.45


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-11-05_12-48-55
  done: false
  episode_len_mean: 318.47
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.1846999999999763
  episode_reward_min: -4.369999999999951
  episodes_this_iter: 3
  episodes_total: 132
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8713575230704413
          entropy_coeff: 0.009999999999999998
          kl: 0.007583568872045356
          policy_loss: 0.06024645318587621
          total_loss: 0.05371652096509934
          vf_explained_var: 0.4523247480392456
          vf_loss: 0.01066692824760038
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,45,1023.2,45000,-3.1847,-2.49,-4.37,318.47


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-11-05_12-49-16
  done: false
  episode_len_mean: 315.22
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.1521999999999766
  episode_reward_min: -3.9599999999999596
  episodes_this_iter: 3
  episodes_total: 135
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.817257449362013
          entropy_coeff: 0.009999999999999998
          kl: 0.00873416619211071
          policy_loss: -0.018163379033406576
          total_loss: -0.02246262513928943
          vf_explained_var: 0.06233019754290581
          vf_loss: 0.012126495094142027
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,46,1044.05,46000,-3.1522,-2.49,-3.96,315.22


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-11-05_12-49-36
  done: false
  episode_len_mean: 313.12
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.131199999999977
  episode_reward_min: -3.9199999999999604
  episodes_this_iter: 3
  episodes_total: 138
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7505854235755072
          entropy_coeff: 0.009999999999999998
          kl: 0.008725572679655416
          policy_loss: -0.11004642910427517
          total_loss: -0.11010883665747112
          vf_explained_var: 0.06200701370835304
          vf_loss: 0.015698332536137765
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,47,1064.28,47000,-3.1312,-2.49,-3.92,313.12


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-11-05_12-49-58
  done: false
  episode_len_mean: 310.53
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.1052999999999775
  episode_reward_min: -3.6899999999999653
  episodes_this_iter: 4
  episodes_total: 142
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.807555603981018
          entropy_coeff: 0.009999999999999998
          kl: 0.005762874653953137
          policy_loss: -0.001049985902176963
          total_loss: -0.0027275845408439637
          vf_explained_var: 0.08934637904167175
          vf_loss: 0.015245381877240208
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,48,1085.39,48000,-3.1053,-2.49,-3.69,310.53


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-11-05_12-50-19
  done: false
  episode_len_mean: 309.16
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.091599999999978
  episode_reward_min: -3.6899999999999653
  episodes_this_iter: 3
  episodes_total: 145
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.764299217859904
          entropy_coeff: 0.009999999999999998
          kl: 0.0076243574283943695
          policy_loss: 0.061773609618345895
          total_loss: 0.05612647400961982
          vf_explained_var: 0.2297632247209549
          vf_loss: 0.010470983351761888
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,49,1106.38,49000,-3.0916,-2.49,-3.69,309.16


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-11-05_12-50-40
  done: false
  episode_len_mean: 307.8
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.077999999999978
  episode_reward_min: -3.619999999999967
  episodes_this_iter: 3
  episodes_total: 148
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7123074253400168
          entropy_coeff: 0.009999999999999998
          kl: 0.008125454900878952
          policy_loss: -0.09465382239884801
          total_loss: -0.09643103149202135
          vf_explained_var: 0.052932992577552795
          vf_loss: 0.013720773564030727
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,50,1127.58,50000,-3.078,-2.49,-3.62,307.8




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-11-05_12-51-19
  done: false
  episode_len_mean: 305.05
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.0504999999999787
  episode_reward_min: -3.619999999999967
  episodes_this_iter: 4
  episodes_total: 152
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7127217345767551
          entropy_coeff: 0.009999999999999998
          kl: 0.00476401408646178
          policy_loss: 0.02092351358797815
          total_loss: 0.017861722078588275
          vf_explained_var: 0.13392861187458038
          vf_loss: 0.013112623234175974
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,51,1166.54,51000,-3.0505,-2.49,-3.62,305.05


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-11-05_12-51-41
  done: false
  episode_len_mean: 303.23
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.0322999999999793
  episode_reward_min: -3.5399999999999685
  episodes_this_iter: 3
  episodes_total: 155
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7010592328177558
          entropy_coeff: 0.009999999999999998
          kl: 0.015371481476811787
          policy_loss: -0.02388275952802764
          total_loss: -0.029263213276863098
          vf_explained_var: 0.24043706059455872
          vf_loss: 0.010092991737959285
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 520

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,52,1188.42,52000,-3.0323,-2.49,-3.54,303.23


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-11-05_12-52-02
  done: false
  episode_len_mean: 302.27
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.0226999999999795
  episode_reward_min: -3.45999999999997
  episodes_this_iter: 4
  episodes_total: 159
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5987839500109355
          entropy_coeff: 0.009999999999999998
          kl: 0.008005269584418566
          policy_loss: -0.005702673561043209
          total_loss: -0.007861726979414622
          vf_explained_var: 0.2758680284023285
          vf_loss: 0.013028258644044399
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,53,1209.44,53000,-3.0227,-2.49,-3.46,302.27


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-11-05_12-52-23
  done: false
  episode_len_mean: 301.67
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.0166999999999793
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 3
  episodes_total: 162
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.604651939868927
          entropy_coeff: 0.009999999999999998
          kl: 0.008465048755288822
          policy_loss: 0.038061997956699796
          total_loss: 0.03196888599130843
          vf_explained_var: 0.49064555764198303
          vf_loss: 0.009106904367864545
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,54,1230.86,54000,-3.0167,-2.49,-3.32,301.67


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-11-05_12-52-46
  done: false
  episode_len_mean: 300.02
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.0001999999999795
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 166
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5443214178085327
          entropy_coeff: 0.009999999999999998
          kl: 0.015870347662587273
          policy_loss: -0.01718266415927145
          total_loss: -0.017799381208088664
          vf_explained_var: 0.32240092754364014
          vf_loss: 0.013239461980346177
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 5500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,55,1253.38,55000,-3.0002,-2.49,-3.32,300.02


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-11-05_12-53-09
  done: false
  episode_len_mean: 298.94
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -2.9893999999999803
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 3
  episodes_total: 169
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5535834232966106
          entropy_coeff: 0.009999999999999998
          kl: 0.008797075830577306
          policy_loss: 0.016112927140461072
          total_loss: 0.011484138460622894
          vf_explained_var: 0.3315800428390503
          vf_loss: 0.010027336818166078
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,56,1276.51,56000,-2.9894,-2.49,-3.32,298.94


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-11-05_12-53-31
  done: false
  episode_len_mean: 297.81
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -2.97809999999998
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 173
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5393027901649474
          entropy_coeff: 0.009999999999999998
          kl: 0.004126589654754101
          policy_loss: 0.03014610509077708
          total_loss: 0.02964391741487715
          vf_explained_var: 0.23452207446098328
          vf_loss: 0.01447817984347542
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,57,1299.01,57000,-2.9781,-2.49,-3.32,297.81


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-11-05_12-53-55
  done: false
  episode_len_mean: 296.42
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -2.964199999999981
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 177
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.4979062808884516
          entropy_coeff: 0.009999999999999998
          kl: 0.01194247860014749
          policy_loss: -0.010760611005955273
          total_loss: -0.008190152131848864
          vf_explained_var: 0.05340703949332237
          vf_loss: 0.016952394073208175
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,58,1322.31,58000,-2.9642,-2.49,-3.32,296.42




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-11-05_12-54-36
  done: false
  episode_len_mean: 295.08
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.950799999999981
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 3
  episodes_total: 180
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.534555987517039
          entropy_coeff: 0.009999999999999998
          kl: 0.011504547483395565
          policy_loss: -0.11230703923437331
          total_loss: -0.11092196802298228
          vf_explained_var: 0.09818857908248901
          vf_loss: 0.01615540273487568
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,59,1363.45,59000,-2.9508,-2.31,-3.32,295.08


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-11-05_12-55-00
  done: false
  episode_len_mean: 293.5
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.9349999999999814
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 184
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.4894011086887784
          entropy_coeff: 0.009999999999999998
          kl: 0.01204409086087694
          policy_loss: -0.000912954244348738
          total_loss: 0.001605608272883627
          vf_explained_var: 0.09882667660713196
          vf_loss: 0.016810368187725543
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,60,1387.15,60000,-2.935,-2.31,-3.32,293.5


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-11-05_12-55-22
  done: false
  episode_len_mean: 292.15
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.9214999999999822
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 188
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.4317830787764656
          entropy_coeff: 0.009999999999999998
          kl: 0.009728076836880935
          policy_loss: 0.034387235881553756
          total_loss: 0.03506368402805593
          vf_explained_var: 0.05619039013981819
          vf_loss: 0.014507873294254145
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,61,1409.95,61000,-2.9215,-2.31,-3.32,292.15


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-11-05_12-55-45
  done: false
  episode_len_mean: 291.13
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.9112999999999816
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 192
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.3906193481551277
          entropy_coeff: 0.009999999999999998
          kl: 0.0059034372113626115
          policy_loss: 0.0054064777162339955
          total_loss: 0.009356006483236949
          vf_explained_var: 0.03892297297716141
          vf_loss: 0.01756054868714677
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 620

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,62,1432.57,62000,-2.9113,-2.31,-3.32,291.13


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-11-05_12-56-09
  done: false
  episode_len_mean: 289.78
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.8977999999999815
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 3
  episodes_total: 195
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.3572178151872423
          entropy_coeff: 0.009999999999999998
          kl: 0.010690243273487807
          policy_loss: -0.10552270693911446
          total_loss: -0.10178609366218248
          vf_explained_var: 0.07867462188005447
          vf_loss: 0.016774276602599357
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 6300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,63,1456.21,63000,-2.8978,-2.31,-3.32,289.78


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-11-05_12-56-33
  done: false
  episode_len_mean: 287.74
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.8773999999999824
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 199
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.2757095019022624
          entropy_coeff: 0.009999999999999998
          kl: 0.002800309470139576
          policy_loss: -0.027333680540323257
          total_loss: -0.023641398466295666
          vf_explained_var: 0.10639787465333939
          vf_loss: 0.016309360487179625
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,64,1480.24,64000,-2.8774,-2.31,-3.32,287.74


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-11-05_12-56-57
  done: false
  episode_len_mean: 285.31
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.8530999999999835
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 203
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 1.3706579592492845
          entropy_coeff: 0.009999999999999998
          kl: 0.011554900932880783
          policy_loss: -0.01720287071333991
          total_loss: -0.015589577621883816
          vf_explained_var: 0.2001771330833435
          vf_loss: 0.015030999678290552
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 650

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,65,1504.62,65000,-2.8531,-2.31,-3.32,285.31


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-11-05_12-57-21
  done: false
  episode_len_mean: 283.4
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.8339999999999836
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 207
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 1.2921024216545953
          entropy_coeff: 0.009999999999999998
          kl: 0.011041168606989199
          policy_loss: -0.000416025353802575
          total_loss: 0.0023773383763101364
          vf_explained_var: 0.13591091334819794
          vf_loss: 0.01543836019312342
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 660

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,66,1528.34,66000,-2.834,-2.31,-3.32,283.4




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-11-05_12-58-04
  done: false
  episode_len_mean: 281.73
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.8172999999999844
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 211
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 1.3276541723145379
          entropy_coeff: 0.009999999999999998
          kl: 0.009762847782044363
          policy_loss: -0.0015446758932537502
          total_loss: 0.000822682347562578
          vf_explained_var: 0.11152283102273941
          vf_loss: 0.015399823741366466
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,67,1570.9,67000,-2.8173,-2.18,-3.32,281.73


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-11-05_12-58-28
  done: false
  episode_len_mean: 280.23
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.8022999999999842
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 215
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 1.3592947880427042
          entropy_coeff: 0.009999999999999998
          kl: 0.012272731601142084
          policy_loss: -0.00039409614271587794
          total_loss: 0.001961560919880867
          vf_explained_var: 0.08586733788251877
          vf_loss: 0.015641787534372675
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,68,1595.28,68000,-2.8023,-2.18,-3.32,280.23


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-11-05_12-58-52
  done: false
  episode_len_mean: 278.87
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.788699999999985
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 219
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 1.4268960701094733
          entropy_coeff: 0.009999999999999998
          kl: 0.011495288895658855
          policy_loss: 0.015870516581667795
          total_loss: 0.016603588312864303
          vf_explained_var: 0.11676181852817535
          vf_loss: 0.01471464866772294
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 69000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,69,1618.9,69000,-2.7887,-2.18,-3.32,278.87


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-11-05_12-59-15
  done: false
  episode_len_mean: 278.11
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.7810999999999844
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 223
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 1.4085832489861383
          entropy_coeff: 0.009999999999999998
          kl: 0.012833098651030165
          policy_loss: 0.014019793934292264
          total_loss: 0.014587289426061842
          vf_explained_var: 0.10266479849815369
          vf_loss: 0.014332499355077744
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,70,1641.89,70000,-2.7811,-2.18,-3.32,278.11


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-11-05_12-59-37
  done: false
  episode_len_mean: 277.56
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.7755999999999834
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 3
  episodes_total: 226
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 1.4903475006421407
          entropy_coeff: 0.009999999999999998
          kl: 0.00997356347325798
          policy_loss: -0.021662296768691806
          total_loss: -0.0258094502819909
          vf_explained_var: 0.07544451206922531
          vf_loss: 0.01050697927001036
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,71,1664.72,71000,-2.7756,-2.18,-3.32,277.56


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-11-05_13-00-00
  done: false
  episode_len_mean: 276.54
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.765399999999984
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 230
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 1.5360112508138022
          entropy_coeff: 0.009999999999999998
          kl: 0.010463615199157061
          policy_loss: 0.021543597305814424
          total_loss: 0.02094361906250318
          vf_explained_var: 0.05323716625571251
          vf_loss: 0.014498542062938213
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,72,1686.78,72000,-2.7654,-2.18,-3.32,276.54


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-11-05_13-00-23
  done: false
  episode_len_mean: 275.11
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.751099999999985
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 234
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 1.4754434771007963
          entropy_coeff: 0.009999999999999998
          kl: 0.010381205305647572
          policy_loss: 0.003849181201722887
          total_loss: 0.005389392955435647
          vf_explained_var: 0.07202484458684921
          vf_loss: 0.016035115046219694
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 7300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,73,1709.75,73000,-2.7511,-2.18,-3.32,275.11


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-11-05_13-00-46
  done: false
  episode_len_mean: 273.69
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.736899999999985
  episode_reward_min: -3.149999999999977
  episodes_this_iter: 3
  episodes_total: 237
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 1.4657073391808404
          entropy_coeff: 0.009999999999999998
          kl: 0.004026381197945028
          policy_loss: 0.020523937377664777
          total_loss: 0.016626187745067807
          vf_explained_var: 0.14881344139575958
          vf_loss: 0.010658664582297205
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 7400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,74,1733.19,74000,-2.7369,-2.18,-3.15,273.69




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-11-05_13-01-28
  done: false
  episode_len_mean: 271.62
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.716199999999986
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 241
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.397702850235833
          entropy_coeff: 0.009999999999999998
          kl: 0.010206596775319326
          policy_loss: -0.013614209327432845
          total_loss: -0.011950741170181169
          vf_explained_var: 0.11685723066329956
          vf_loss: 0.015512915255708828
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 750

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,75,1775.34,75000,-2.7162,-2.18,-3.1,271.62


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-11-05_13-01-53
  done: false
  episode_len_mean: 269.85
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.698499999999986
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 245
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.3018123984336853
          entropy_coeff: 0.009999999999999998
          kl: 0.012969560307996472
          policy_loss: 0.010295331395334667
          total_loss: 0.01257113731569714
          vf_explained_var: 0.10708354413509369
          vf_loss: 0.015131810545507404
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 76000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,76,1799.82,76000,-2.6985,-2.18,-3.1,269.85


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-11-05_13-02-21
  done: false
  episode_len_mean: 267.84
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.6783999999999875
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 249
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.4007828527026707
          entropy_coeff: 0.009999999999999998
          kl: 0.007788422180566733
          policy_loss: 0.00869550286895699
          total_loss: 0.00897462773654196
          vf_explained_var: 0.15512503683567047
          vf_loss: 0.014189598730040921
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 77000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,77,1827.72,77000,-2.6784,-2.18,-3.1,267.84


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-11-05_13-02-45
  done: false
  episode_len_mean: 266.62
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.666199999999987
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 253
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.3989993704689874
          entropy_coeff: 0.009999999999999998
          kl: 0.006838434963721814
          policy_loss: 0.032587689575221804
          total_loss: 0.031454921431011626
          vf_explained_var: 0.20404087007045746
          vf_loss: 0.012771745212376118
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 7800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,78,1852.15,78000,-2.6662,-2.18,-3.1,266.62


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-11-05_13-03-09
  done: false
  episode_len_mean: 265.34
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.653399999999987
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 257
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.393469308482276
          entropy_coeff: 0.009999999999999998
          kl: 0.007771856410660997
          policy_loss: 0.015585254960589939
          total_loss: 0.015240711718797683
          vf_explained_var: 0.20334891974925995
          vf_loss: 0.013492996338754892
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 79000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,79,1876.02,79000,-2.6534,-2.18,-3.1,265.34


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-11-05_13-03-33
  done: false
  episode_len_mean: 263.81
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.6380999999999877
  episode_reward_min: -2.9699999999999807
  episodes_this_iter: 4
  episodes_total: 261
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.3290784623887804
          entropy_coeff: 0.009999999999999998
          kl: 0.006789311030184485
          policy_loss: 0.029428501923878986
          total_loss: 0.028546598967578676
          vf_explained_var: 0.20525430142879486
          vf_loss: 0.012324014326764478
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,80,1899.86,80000,-2.6381,-2.18,-2.97,263.81


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-11-05_13-03-57
  done: false
  episode_len_mean: 263.21
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.6320999999999874
  episode_reward_min: -2.859999999999983
  episodes_this_iter: 3
  episodes_total: 264
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.3485572086440192
          entropy_coeff: 0.009999999999999998
          kl: 0.00725122519856206
          policy_loss: -0.09342600554227828
          total_loss: -0.09354936646090614
          vf_explained_var: 0.13228894770145416
          vf_loss: 0.01327156958480676
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 81000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,81,1923.77,81000,-2.6321,-2.18,-2.86,263.21


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-11-05_13-04-20
  done: false
  episode_len_mean: 262.97
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.629699999999988
  episode_reward_min: -2.859999999999983
  episodes_this_iter: 4
  episodes_total: 268
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.437472520934211
          entropy_coeff: 0.009999999999999998
          kl: 0.011144011193129561
          policy_loss: 0.01084045817454656
          total_loss: 0.008237139880657196
          vf_explained_var: 0.1634008139371872
          vf_loss: 0.011632107405198945
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 82000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,82,1946.8,82000,-2.6297,-2.18,-2.86,262.97




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-11-05_13-05-00
  done: false
  episode_len_mean: 262.94
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.6293999999999875
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 4
  episodes_total: 272
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.3785139746136135
          entropy_coeff: 0.009999999999999998
          kl: 0.008019198758405663
          policy_loss: 0.014684618678357866
          total_loss: 0.01588292916615804
          vf_explained_var: 0.10866867750883102
          vf_loss: 0.014883211917347379
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 830

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,83,1987.29,83000,-2.6294,-2.18,-2.89,262.94


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-11-05_13-05-23
  done: false
  episode_len_mean: 262.96
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.6295999999999875
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 3
  episodes_total: 275
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.2800200568305122
          entropy_coeff: 0.009999999999999998
          kl: 0.013785113602712803
          policy_loss: -0.10615301860703362
          total_loss: -0.10394884455535147
          vf_explained_var: 0.10757667571306229
          vf_loss: 0.014832059438857768
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,84,2010.25,84000,-2.6296,-2.18,-2.89,262.96


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-11-05_13-05-47
  done: false
  episode_len_mean: 262.96
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.6295999999999875
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 4
  episodes_total: 279
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.20243132909139
          entropy_coeff: 0.009999999999999998
          kl: 0.019402742049280795
          policy_loss: 0.00312689741452535
          total_loss: 0.005176510123742951
          vf_explained_var: 0.18167291581630707
          vf_loss: 0.013831393296519915
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 85000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,85,2033.95,85000,-2.6296,-2.18,-2.89,262.96


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-11-05_13-06-11
  done: false
  episode_len_mean: 263.01
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.630099999999988
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 4
  episodes_total: 283
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.0879968881607056
          entropy_coeff: 0.009999999999999998
          kl: 0.010400045891416139
          policy_loss: 0.005568151010407342
          total_loss: 0.0099114747511016
          vf_explained_var: 0.13353565335273743
          vf_loss: 0.015093289564053218
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 86000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,86,2057.8,86000,-2.6301,-2.18,-2.89,263.01


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-11-05_13-06-36
  done: false
  episode_len_mean: 262.36
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.623599999999988
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 4
  episodes_total: 287
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.0629588041040632
          entropy_coeff: 0.009999999999999998
          kl: 0.00530213364095052
          policy_loss: 0.00905379495686955
          total_loss: 0.013800607621669769
          vf_explained_var: 0.10419560223817825
          vf_loss: 0.01531012508397301
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 87000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,87,2082.43,87000,-2.6236,-2.18,-2.89,262.36


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2021-11-05_13-07-01
  done: false
  episode_len_mean: 261.22
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.6121999999999885
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 4
  episodes_total: 291
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 0.9488087488545311
          entropy_coeff: 0.009999999999999998
          kl: 0.006097631388360626
          policy_loss: -0.015207787851492564
          total_loss: -0.009030511975288391
          vf_explained_var: 0.08946210891008377
          vf_loss: 0.015589143387559387
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_steps_sampled: 88000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,88,2107.58,88000,-2.6122,-2.18,-2.89,261.22


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 89000
  custom_metrics: {}
  date: 2021-11-05_13-07-27
  done: false
  episode_len_mean: 259.73
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.5972999999999886
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 5
  episodes_total: 296
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.0070023344622718
          entropy_coeff: 0.009999999999999998
          kl: 0.0048114194974940875
          policy_loss: -0.02423996047841178
          total_loss: -0.0153647367325094
          vf_explained_var: 0.11414328962564468
          vf_loss: 0.018885103416525655
    num_agent_steps_sampled: 89000
    num_agent_steps_trained: 89000
    num_steps_sampled: 89000
    num_steps_trained: 89

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,89,2133.59,89000,-2.5973,-2.18,-2.89,259.73




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-11-05_13-08-09
  done: false
  episode_len_mean: 258.88
  episode_media: {}
  episode_reward_max: -2.109999999999999
  episode_reward_mean: -2.5887999999999884
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 4
  episodes_total: 300
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006249999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.8800932652420468
          entropy_coeff: 0.009999999999999998
          kl: 0.012326106562756737
          policy_loss: 0.006189785318242179
          total_loss: 0.01189555095301734
          vf_explained_var: 0.09534216672182083
          vf_loss: 0.014429658589263757
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 9000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,90,2175.31,90000,-2.5888,-2.11,-2.89,258.88


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 91000
  custom_metrics: {}
  date: 2021-11-05_13-08-38
  done: false
  episode_len_mean: 258.2
  episode_media: {}
  episode_reward_max: -2.109999999999999
  episode_reward_mean: -2.5819999999999883
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 4
  episodes_total: 304
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006249999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.8556059678395589
          entropy_coeff: 0.009999999999999998
          kl: 0.005842167287571263
          policy_loss: 0.03823610999517971
          total_loss: 0.04273944008681509
          vf_explained_var: 0.07834780961275101
          vf_loss: 0.013022876758542325
    num_agent_steps_sampled: 91000
    num_agent_steps_trained: 91000
    num_steps_sampled: 91000
    num_steps_trained: 91000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,91,2204.98,91000,-2.582,-2.11,-2.89,258.2


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2021-11-05_13-09-04
  done: false
  episode_len_mean: 257.19
  episode_media: {}
  episode_reward_max: -2.109999999999999
  episode_reward_mean: -2.571899999999989
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 4
  episodes_total: 308
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006249999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.889687716960907
          entropy_coeff: 0.009999999999999998
          kl: 0.002760336561738402
          policy_loss: -0.020338952955272462
          total_loss: -0.013826165513859855
          vf_explained_var: 0.03294483944773674
          vf_loss: 0.015392413207640251
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_steps_sampled: 92000
    num_steps_trained: 920

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,92,2230.69,92000,-2.5719,-2.11,-2.89,257.19


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 93000
  custom_metrics: {}
  date: 2021-11-05_13-09-30
  done: false
  episode_len_mean: 256.87
  episode_media: {}
  episode_reward_max: -2.109999999999999
  episode_reward_mean: -2.568699999999989
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 4
  episodes_total: 312
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 0.8177715241909027
          entropy_coeff: 0.009999999999999998
          kl: 0.009999399526080152
          policy_loss: -0.10603061997228198
          total_loss: -0.09554204137788878
          vf_explained_var: 0.06936633586883545
          vf_loss: 0.01863504299480054
    num_agent_steps_sampled: 93000
    num_agent_steps_trained: 93000
    num_steps_sampled: 93000
    num_steps_trained: 9300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,93,2256.36,93000,-2.5687,-2.11,-2.89,256.87


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 94000
  custom_metrics: {}
  date: 2021-11-05_13-09-55
  done: false
  episode_len_mean: 255.83
  episode_media: {}
  episode_reward_max: -2.109999999999999
  episode_reward_mean: -2.55829999999999
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 5
  episodes_total: 317
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 0.9078898290793102
          entropy_coeff: 0.009999999999999998
          kl: 0.005859156574599764
          policy_loss: 0.003579138053788079
          total_loss: 0.012300066567129559
          vf_explained_var: 0.0848047062754631
          vf_loss: 0.01778151873085234
    num_agent_steps_sampled: 94000
    num_agent_steps_trained: 94000
    num_steps_sampled: 94000
    num_steps_trained: 94000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,94,2281.94,94000,-2.5583,-2.11,-2.89,255.83


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 95000
  custom_metrics: {}
  date: 2021-11-05_13-10-21
  done: false
  episode_len_mean: 254.65
  episode_media: {}
  episode_reward_max: -2.109999999999999
  episode_reward_mean: -2.5464999999999898
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 4
  episodes_total: 321
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 0.8740407036410438
          entropy_coeff: 0.009999999999999998
          kl: 0.005032776090488274
          policy_loss: 0.04103387751513057
          total_loss: 0.045463927255736454
          vf_explained_var: 0.09610918164253235
          vf_loss: 0.013154731380442779
    num_agent_steps_sampled: 95000
    num_agent_steps_trained: 95000
    num_steps_sampled: 95000
    num_steps_trained: 950

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,95,2307.62,95000,-2.5465,-2.11,-2.89,254.65


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2021-11-05_13-10-46
  done: false
  episode_len_mean: 253.62
  episode_media: {}
  episode_reward_max: -2.109999999999999
  episode_reward_mean: -2.53619999999999
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 4
  episodes_total: 325
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 0.9621061861515046
          entropy_coeff: 0.009999999999999998
          kl: 0.007841880781138538
          policy_loss: 0.01567800450656149
          total_loss: 0.021255616015858122
          vf_explained_var: 0.12741056084632874
          vf_loss: 0.015174165937221713
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_steps_sampled: 96000
    num_steps_trained: 96000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,96,2333,96000,-2.5362,-2.11,-2.89,253.62


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 97000
  custom_metrics: {}
  date: 2021-11-05_13-11-12
  done: false
  episode_len_mean: 252.46
  episode_media: {}
  episode_reward_max: -2.109999999999999
  episode_reward_mean: -2.52459999999999
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 4
  episodes_total: 329
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.0196179270744323
          entropy_coeff: 0.009999999999999998
          kl: 0.005493016436398948
          policy_loss: 0.035272702740298374
          total_loss: 0.03797172788116667
          vf_explained_var: 0.17322443425655365
          vf_loss: 0.01287803959308399
    num_agent_steps_sampled: 97000
    num_agent_steps_trained: 97000
    num_steps_sampled: 97000
    num_steps_trained: 97000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,97,2358.24,97000,-2.5246,-2.11,-2.89,252.46




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 98000
  custom_metrics: {}
  date: 2021-11-05_13-11-56
  done: false
  episode_len_mean: 250.91
  episode_media: {}
  episode_reward_max: -2.109999999999999
  episode_reward_mean: -2.5090999999999903
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 4
  episodes_total: 333
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 0.9181881606578827
          entropy_coeff: 0.009999999999999998
          kl: 0.005196675550051251
          policy_loss: -0.008099054255419307
          total_loss: -0.0030597654067807726
          vf_explained_var: 0.12711970508098602
          vf_loss: 0.01420493358746171
    num_agent_steps_sampled: 98000
    num_agent_steps_trained: 98000
    num_steps_sampled: 98000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,98,2402.53,98000,-2.5091,-2.11,-2.89,250.91


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 99000
  custom_metrics: {}
  date: 2021-11-05_13-12-22
  done: false
  episode_len_mean: 249.32
  episode_media: {}
  episode_reward_max: -2.109999999999999
  episode_reward_mean: -2.4931999999999905
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 5
  episodes_total: 338
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 0.9297860026359558
          entropy_coeff: 0.009999999999999998
          kl: 0.006833628999368102
          policy_loss: -0.025692771954668892
          total_loss: -0.01699707723326153
          vf_explained_var: 0.16295844316482544
          vf_loss: 0.01797220167807407
    num_agent_steps_sampled: 99000
    num_agent_steps_trained: 99000
    num_steps_sampled: 99000
    num_steps_trained: 99

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,99,2428.29,99000,-2.4932,-2.11,-2.89,249.32


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-11-05_13-12-47
  done: false
  episode_len_mean: 248.74
  episode_media: {}
  episode_reward_max: -2.109999999999999
  episode_reward_mean: -2.487399999999991
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 4
  episodes_total: 342
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 1.0412489983770583
          entropy_coeff: 0.009999999999999998
          kl: 0.0055149393096140245
          policy_loss: 0.0009288147919707828
          total_loss: 0.0036298626826869115
          vf_explained_var: 0.21810592710971832
          vf_loss: 0.013096302075104581
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,100,2453.31,100000,-2.4874,-2.11,-2.89,248.74


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 101000
  custom_metrics: {}
  date: 2021-11-05_13-13-12
  done: false
  episode_len_mean: 248.3
  episode_media: {}
  episode_reward_max: -2.109999999999999
  episode_reward_mean: -2.4829999999999908
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 4
  episodes_total: 346
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 0.9904320895671844
          entropy_coeff: 0.009999999999999998
          kl: 0.00826379960717495
          policy_loss: 0.02600270642174615
          total_loss: 0.02758606606059604
          vf_explained_var: 0.22917841374874115
          vf_loss: 0.011461857763222522
    num_agent_steps_sampled: 101000
    num_agent_steps_trained: 101000
    num_steps_sampled: 101000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,101,2478.57,101000,-2.483,-2.11,-2.89,248.3


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 102000
  custom_metrics: {}
  date: 2021-11-05_13-13-38
  done: false
  episode_len_mean: 247.94
  episode_media: {}
  episode_reward_max: -2.109999999999999
  episode_reward_mean: -2.4793999999999907
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 4
  episodes_total: 350
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 0.924974219666587
          entropy_coeff: 0.009999999999999998
          kl: 0.003598874549808981
          policy_loss: 0.02562766575978862
          total_loss: 0.030483713953031434
          vf_explained_var: 0.16554948687553406
          vf_loss: 0.01409454345703125
    num_agent_steps_sampled: 102000
    num_agent_steps_trained: 102000
    num_steps_sampled: 102000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,102,2504.17,102000,-2.4794,-2.11,-2.89,247.94


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 103000
  custom_metrics: {}
  date: 2021-11-05_13-14-03
  done: false
  episode_len_mean: 247.49
  episode_media: {}
  episode_reward_max: -2.109999999999999
  episode_reward_mean: -2.4748999999999906
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 4
  episodes_total: 354
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015624999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 0.9005516820483738
          entropy_coeff: 0.009999999999999998
          kl: 0.005696572702986518
          policy_loss: 0.024320868775248528
          total_loss: 0.02993998233642843
          vf_explained_var: 0.11501085758209229
          vf_loss: 0.014615731075819996
    num_agent_steps_sampled: 103000
    num_agent_steps_trained: 103000
    num_steps_sampled: 103000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,103,2529.12,103000,-2.4749,-2.11,-2.89,247.49


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2021-11-05_13-14-29
  done: false
  episode_len_mean: 246.7
  episode_media: {}
  episode_reward_max: -2.109999999999999
  episode_reward_mean: -2.466999999999991
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 4
  episodes_total: 358
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015624999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 0.8406361202398936
          entropy_coeff: 0.009999999999999998
          kl: 0.003680405399867305
          policy_loss: -0.010564460315638118
          total_loss: -0.004283385848005613
          vf_explained_var: 0.09683408588171005
          vf_loss: 0.014681683304823107
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_steps_sampled: 104000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,104,2555.17,104000,-2.467,-2.11,-2.89,246.7




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 105000
  custom_metrics: {}
  date: 2021-11-05_13-15-12
  done: false
  episode_len_mean: 245.04
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.4503999999999917
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 5
  episodes_total: 363
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007812499999999998
          cur_lr: 5.000000000000001e-05
          entropy: 0.8376749555269877
          entropy_coeff: 0.009999999999999998
          kl: 0.0059281487038493055
          policy_loss: -0.032975833117961886
          total_loss: -0.02343098988963498
          vf_explained_var: 0.14820817112922668
          vf_loss: 0.017916961345407698
    num_agent_steps_sampled: 105000
    num_agent_steps_trained: 105000
    num_steps_sampled: 105000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,105,2598.49,105000,-2.4504,-2.09,-2.89,245.04


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 106000
  custom_metrics: {}
  date: 2021-11-05_13-15-40
  done: false
  episode_len_mean: 243.69
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.436899999999992
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 4
  episodes_total: 367
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007812499999999998
          cur_lr: 5.000000000000001e-05
          entropy: 0.8353540851010217
          entropy_coeff: 0.009999999999999998
          kl: 0.010030284196269513
          policy_loss: 0.021091431337926122
          total_loss: 0.026282824824253717
          vf_explained_var: 0.17554041743278503
          vf_loss: 0.013537098943359322
    num_agent_steps_sampled: 106000
    num_agent_steps_trained: 106000
    num_steps_sampled: 106000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,106,2626.57,106000,-2.4369,-2.09,-2.89,243.69


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 107000
  custom_metrics: {}
  date: 2021-11-05_13-16-06
  done: false
  episode_len_mean: 242.65
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.426499999999992
  episode_reward_min: -2.859999999999983
  episodes_this_iter: 4
  episodes_total: 371
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007812499999999998
          cur_lr: 5.000000000000001e-05
          entropy: 0.8075298488140106
          entropy_coeff: 0.009999999999999998
          kl: 0.003691562135855659
          policy_loss: 0.0337827793839905
          total_loss: 0.03898227276901404
          vf_explained_var: 0.10257113724946976
          vf_loss: 0.013271909496850438
    num_agent_steps_sampled: 107000
    num_agent_steps_trained: 107000
    num_steps_sampled: 107000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,107,2652.12,107000,-2.4265,-2.09,-2.86,242.65


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2021-11-05_13-16-32
  done: false
  episode_len_mean: 241.0
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.4099999999999926
  episode_reward_min: -2.809999999999984
  episodes_this_iter: 4
  episodes_total: 375
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003906249999999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.7295100178983477
          entropy_coeff: 0.009999999999999998
          kl: 0.00281642467581062
          policy_loss: 0.01073956878648864
          total_loss: 0.017954834633403353
          vf_explained_var: 0.09417292475700378
          vf_loss: 0.014509263676073816
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_steps_sampled: 108000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,108,2678.02,108000,-2.41,-2.09,-2.81,241


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 109000
  custom_metrics: {}
  date: 2021-11-05_13-16-58
  done: false
  episode_len_mean: 239.69
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.396899999999993
  episode_reward_min: -2.669999999999987
  episodes_this_iter: 4
  episodes_total: 379
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00019531249999999996
          cur_lr: 5.000000000000001e-05
          entropy: 0.5863077660401662
          entropy_coeff: 0.009999999999999998
          kl: 0.004961704606147856
          policy_loss: -0.11560064777731896
          total_loss: -0.1029247565401925
          vf_explained_var: 0.0695195347070694
          vf_loss: 0.01853799747510089
    num_agent_steps_sampled: 109000
    num_agent_steps_trained: 109000
    num_steps_sampled: 109000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,109,2703.95,109000,-2.3969,-2.09,-2.67,239.69


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-11-05_13-17-23
  done: false
  episode_len_mean: 238.56
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.385599999999993
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 5
  episodes_total: 384
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.765624999999998e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.6524579438898298
          entropy_coeff: 0.009999999999999998
          kl: 0.002654540493402793
          policy_loss: -0.0004343136317200131
          total_loss: 0.009871617952982584
          vf_explained_var: 0.0867527574300766
          vf_loss: 0.0168302488927212
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,110,2729.5,110000,-2.3856,-2.09,-2.57,238.56


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 111000
  custom_metrics: {}
  date: 2021-11-05_13-17-49
  done: false
  episode_len_mean: 238.02
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.380199999999993
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 388
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.882812499999999e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.6514344056447346
          entropy_coeff: 0.009999999999999998
          kl: 0.003966675283257606
          policy_loss: 0.02745395509733094
          total_loss: 0.034824908648928
          vf_explained_var: 0.05898308753967285
          vf_loss: 0.01388510526675317
    num_agent_steps_sampled: 111000
    num_agent_steps_trained: 111000
    num_steps_sampled: 111000
    num_steps_trained: 1110

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,111,2755.37,111000,-2.3802,-2.09,-2.57,238.02




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-11-05_13-18-33
  done: false
  episode_len_mean: 237.34
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.373399999999993
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 392
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4414062499999995e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.6176887141333686
          entropy_coeff: 0.009999999999999998
          kl: 0.0023618376349190838
          policy_loss: -0.01346125052207046
          total_loss: -0.0052193488511774275
          vf_explained_var: 0.05784553289413452
          vf_loss: 0.014418734403120147
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,112,2799.61,112000,-2.3734,-1.99,-2.57,237.34


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-11-05_13-18-59
  done: false
  episode_len_mean: 237.09
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.370899999999993
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 5
  episodes_total: 397
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2207031249999997e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5364729411072201
          entropy_coeff: 0.009999999999999998
          kl: 0.01103050611287636
          policy_loss: -0.011371837473577923
          total_loss: 0.0015309953855143653
          vf_explained_var: 0.07767710089683533
          vf_loss: 0.018267431161883806
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,113,2825.37,113000,-2.3709,-1.99,-2.57,237.09


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 114000
  custom_metrics: {}
  date: 2021-11-05_13-19-25
  done: false
  episode_len_mean: 237.18
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.371799999999993
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 401
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2207031249999997e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.6084653463628557
          entropy_coeff: 0.009999999999999998
          kl: 0.004665612148444845
          policy_loss: 0.028124281225932968
          total_loss: 0.035759880931841
          vf_explained_var: 0.09283306449651718
          vf_loss: 0.013720199176006847
    num_agent_steps_sampled: 114000
    num_agent_steps_trained: 114000
    num_steps_sampled: 114000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,114,2851.56,114000,-2.3718,-1.99,-2.57,237.18


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 115000
  custom_metrics: {}
  date: 2021-11-05_13-19-52
  done: false
  episode_len_mean: 237.09
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.370899999999993
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 405
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.103515624999999e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.5693028304311964
          entropy_coeff: 0.009999999999999998
          kl: 0.0034528693124490435
          policy_loss: -0.025894941223992243
          total_loss: -0.01706527355644438
          vf_explained_var: 0.06456984579563141
          vf_loss: 0.014522677763468689
    num_agent_steps_sampled: 115000
    num_agent_steps_trained: 115000
    num_steps_sampled: 115000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,115,2877.83,115000,-2.3709,-1.99,-2.57,237.09


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2021-11-05_13-20-18
  done: false
  episode_len_mean: 236.86
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.3685999999999936
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 5
  episodes_total: 410
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0517578124999993e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6070400304264493
          entropy_coeff: 0.009999999999999998
          kl: 0.03320579153228004
          policy_loss: -0.02574233180946774
          total_loss: -0.013653539617856343
          vf_explained_var: 0.07491669058799744
          vf_loss: 0.01815909557044506
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_steps_sampled: 116000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,116,2903.77,116000,-2.3686,-1.99,-2.57,236.86


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 117000
  custom_metrics: {}
  date: 2021-11-05_13-20-43
  done: false
  episode_len_mean: 236.92
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.369199999999993
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 414
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.57763671875e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8531733095645905
          entropy_coeff: 0.009999999999999998
          kl: 0.023281183643260272
          policy_loss: 0.017197853492365942
          total_loss: 0.021619229349825117
          vf_explained_var: 0.1538727581501007
          vf_loss: 0.012953007479922639
    num_agent_steps_sampled: 117000
    num_agent_steps_trained: 117000
    num_steps_sampled: 117000
    num_steps_trained: 1170

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,117,2929.29,117000,-2.3692,-1.99,-2.57,236.92


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 118000
  custom_metrics: {}
  date: 2021-11-05_13-21-09
  done: false
  episode_len_mean: 237.52
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.375199999999993
  episode_reward_min: -2.579999999999989
  episodes_this_iter: 4
  episodes_total: 418
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.866455078125003e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.004030798541175
          entropy_coeff: 0.009999999999999998
          kl: 0.011464700069693497
          policy_loss: 0.0007140966753164927
          total_loss: 0.004594864365127352
          vf_explained_var: 0.21651875972747803
          vf_loss: 0.013920997321191762
    num_agent_steps_sampled: 118000
    num_agent_steps_trained: 118000
    num_steps_sampled: 118000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,118,2954.54,118000,-2.3752,-1.99,-2.58,237.52




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 119000
  custom_metrics: {}
  date: 2021-11-05_13-21-51
  done: false
  episode_len_mean: 238.05
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.3804999999999934
  episode_reward_min: -2.819999999999984
  episodes_this_iter: 4
  episodes_total: 422
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.866455078125003e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.091212452120251
          entropy_coeff: 0.009999999999999998
          kl: 0.011893486417361349
          policy_loss: 0.036440736552079515
          total_loss: 0.03646956764989429
          vf_explained_var: 0.34993234276771545
          vf_loss: 0.010940879911908672
    num_agent_steps_sampled: 119000
    num_agent_steps_trained: 119000
    num_steps_sampled: 119000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,119,2997.41,119000,-2.3805,-1.99,-2.82,238.05


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-11-05_13-22-16
  done: false
  episode_len_mean: 238.45
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.384499999999993
  episode_reward_min: -2.819999999999984
  episodes_this_iter: 4
  episodes_total: 426
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.866455078125003e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9798281715975867
          entropy_coeff: 0.009999999999999998
          kl: 0.006461782948205218
          policy_loss: -0.010697604384687212
          total_loss: -0.008183286835749945
          vf_explained_var: 0.3606413006782532
          vf_loss: 0.012312563436312807
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,120,3022.15,120000,-2.3845,-1.99,-2.82,238.45


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 121000
  custom_metrics: {}
  date: 2021-11-05_13-22-40
  done: false
  episode_len_mean: 239.21
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.392099999999993
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 3
  episodes_total: 429
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.866455078125003e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9002455512682597
          entropy_coeff: 0.009999999999999998
          kl: 0.008025558773411648
          policy_loss: -0.011842936153213184
          total_loss: -0.011240605057941543
          vf_explained_var: 0.354497492313385
          vf_loss: 0.009604737413529721
    num_agent_steps_sampled: 121000
    num_agent_steps_trained: 121000
    num_steps_sampled: 121000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,121,3046.06,121000,-2.3921,-1.99,-2.85,239.21


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 122000
  custom_metrics: {}
  date: 2021-11-05_13-23-05
  done: false
  episode_len_mean: 239.95
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.3994999999999926
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 433
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.866455078125003e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8028756572140587
          entropy_coeff: 0.009999999999999998
          kl: 0.004414626964711487
          policy_loss: -0.00892960247066286
          total_loss: -0.004356576585107379
          vf_explained_var: 0.2214880883693695
          vf_loss: 0.01260175746348169
    num_agent_steps_sampled: 122000
    num_agent_steps_trained: 122000
    num_steps_sampled: 122000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,122,3070.52,122000,-2.3995,-1.99,-2.85,239.95


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 123000
  custom_metrics: {}
  date: 2021-11-05_13-23-29
  done: false
  episode_len_mean: 240.56
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.405599999999992
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 437
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4332275390625014e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6738092064857483
          entropy_coeff: 0.009999999999999998
          kl: 0.006358625318773173
          policy_loss: -0.04802916182412042
          total_loss: -0.041869824959172144
          vf_explained_var: 0.15534527599811554
          vf_loss: 0.012897414062172175
    num_agent_steps_sampled: 123000
    num_agent_steps_trained: 123000
    num_steps_sampled: 123000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,123,3095.04,123000,-2.4056,-1.99,-2.85,240.56


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-11-05_13-23-55
  done: false
  episode_len_mean: 240.66
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.4065999999999925
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 441
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4332275390625014e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6255942540036308
          entropy_coeff: 0.009999999999999998
          kl: 0.005707045785312124
          policy_loss: -0.027824381904469595
          total_loss: -0.021159945014450284
          vf_explained_var: 0.17510002851486206
          vf_loss: 0.01292036489273111
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,124,3120.35,124000,-2.4066,-1.99,-2.85,240.66


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 125000
  custom_metrics: {}
  date: 2021-11-05_13-24-20
  done: false
  episode_len_mean: 240.8
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.407999999999993
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 445
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4332275390625014e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7279574950536092
          entropy_coeff: 0.009999999999999998
          kl: 0.006845735633264619
          policy_loss: -0.11283549434608883
          total_loss: -0.10414387087027231
          vf_explained_var: 0.17733070254325867
          vf_loss: 0.01597117923407091
    num_agent_steps_sampled: 125000
    num_agent_steps_trained: 125000
    num_steps_sampled: 125000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,125,3145.71,125000,-2.408,-1.99,-2.85,240.8




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 126000
  custom_metrics: {}
  date: 2021-11-05_13-25-02
  done: false
  episode_len_mean: 240.69
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.4068999999999927
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 5
  episodes_total: 450
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4332275390625014e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8390616562631396
          entropy_coeff: 0.009999999999999998
          kl: 0.006519429543099204
          policy_loss: 0.002158020105626848
          total_loss: 0.006719632281197442
          vf_explained_var: 0.27518409490585327
          vf_loss: 0.012952210040142138
    num_agent_steps_sampled: 126000
    num_agent_steps_trained: 126000
    num_steps_sampled: 126000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,126,3187.37,126000,-2.4069,-1.99,-2.85,240.69


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 127000
  custom_metrics: {}
  date: 2021-11-05_13-25-29
  done: false
  episode_len_mean: 240.68
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.406799999999992
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 454
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4332275390625014e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7004637678464254
          entropy_coeff: 0.009999999999999998
          kl: 0.0047521217456629
          policy_loss: 0.048916729456848565
          total_loss: 0.05189434289932251
          vf_explained_var: 0.2750064730644226
          vf_loss: 0.009982238153720067
    num_agent_steps_sampled: 127000
    num_agent_steps_trained: 127000
    num_steps_sampled: 127000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,127,3214.45,127000,-2.4068,-1.99,-2.85,240.68


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-11-05_13-25-55
  done: false
  episode_len_mean: 240.81
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.4080999999999926
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 458
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7166137695312507e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.604666202598148
          entropy_coeff: 0.009999999999999998
          kl: 0.0038005895552836694
          policy_loss: 0.033719791720310845
          total_loss: 0.041278527677059175
          vf_explained_var: 0.14536069333553314
          vf_loss: 0.013605394659356939
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,128,3240.26,128000,-2.4081,-1.99,-2.85,240.81


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 129000
  custom_metrics: {}
  date: 2021-11-05_13-26-21
  done: false
  episode_len_mean: 241.0
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.4099999999999926
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 462
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.583068847656253e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.5583855566051271
          entropy_coeff: 0.009999999999999998
          kl: 0.04775010853720355
          policy_loss: 0.03722244948148727
          total_loss: 0.04465984089506997
          vf_explained_var: 0.07552900910377502
          vf_loss: 0.01302120897711979
    num_agent_steps_sampled: 129000
    num_agent_steps_trained: 129000
    num_steps_sampled: 129000
    num_steps_trained: 129

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,129,3266.31,129000,-2.41,-1.99,-2.85,241


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-11-05_13-26-48
  done: false
  episode_len_mean: 240.67
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.4066999999999923
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 5
  episodes_total: 467
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2874603271484372e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8305200636386871
          entropy_coeff: 0.009999999999999998
          kl: 0.03746487398440473
          policy_loss: -0.024780902845991982
          total_loss: -0.01175224201546775
          vf_explained_var: 0.2913188934326172
          vf_loss: 0.021333816637181573
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,130,3294.01,130000,-2.4067,-1.99,-2.85,240.67


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 131000
  custom_metrics: {}
  date: 2021-11-05_13-27-14
  done: false
  episode_len_mean: 240.5
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.4049999999999927
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 471
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.931190490722656e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0026224156220753
          entropy_coeff: 0.009999999999999998
          kl: 0.044225134564871066
          policy_loss: 0.012725172688563664
          total_loss: 0.016442466692792045
          vf_explained_var: 0.3766723573207855
          vf_loss: 0.013743435425890817
    num_agent_steps_sampled: 131000
    num_agent_steps_trained: 131000
    num_steps_sampled: 131000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,131,3320.07,131000,-2.405,-1.99,-2.85,240.5


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2021-11-05_13-27-41
  done: false
  episode_len_mean: 241.07
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.4106999999999927
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 475
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8967857360839857e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8343577404816945
          entropy_coeff: 0.009999999999999998
          kl: 0.014092133486613515
          policy_loss: 0.02862207723988427
          total_loss: 0.027803727984428407
          vf_explained_var: 0.7430611252784729
          vf_loss: 0.007525191851891577
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_steps_sampled: 132000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,132,3346.18,132000,-2.4107,-1.99,-2.85,241.07


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 133000
  custom_metrics: {}
  date: 2021-11-05_13-28-06
  done: false
  episode_len_mean: 241.71
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.417099999999992
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 479
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8967857360839857e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8810897668202718
          entropy_coeff: 0.009999999999999998
          kl: 0.014338105711542681
          policy_loss: -0.040286571346223356
          total_loss: -0.044290331689019996
          vf_explained_var: 0.8958851099014282
          vf_loss: 0.00480709550384846
    num_agent_steps_sampled: 133000
    num_agent_steps_trained: 133000
    num_steps_sampled: 133000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,133,3371.72,133000,-2.4171,-1.99,-2.85,241.71




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 134000
  custom_metrics: {}
  date: 2021-11-05_13-28-51
  done: false
  episode_len_mean: 241.78
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.417799999999992
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 483
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8967857360839857e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8094776981406742
          entropy_coeff: 0.009999999999999998
          kl: 0.051670377489014464
          policy_loss: -0.010986377050479253
          total_loss: -0.013063699669308133
          vf_explained_var: 0.8652576208114624
          vf_loss: 0.006017308658920228
    num_agent_steps_sampled: 134000
    num_agent_steps_trained: 134000
    num_steps_sampled: 134000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,134,3416.29,134000,-2.4178,-1.99,-2.85,241.78


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 135000
  custom_metrics: {}
  date: 2021-11-05_13-29-13
  done: false
  episode_len_mean: 243.01
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.4300999999999915
  episode_reward_min: -2.909999999999982
  episodes_this_iter: 3
  episodes_total: 486
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.345178604125975e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.000928792026308
          entropy_coeff: 0.009999999999999998
          kl: 0.011886107354600502
          policy_loss: -0.12709013687239754
          total_loss: -0.1290982177688016
          vf_explained_var: 0.7095783948898315
          vf_loss: 0.008001158267466559
    num_agent_steps_sampled: 135000
    num_agent_steps_trained: 135000
    num_steps_sampled: 135000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,135,3438.58,135000,-2.4301,-1.99,-2.91,243.01


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-11-05_13-29-35
  done: false
  episode_len_mean: 245.53
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.4552999999999914
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 4
  episodes_total: 490
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.345178604125975e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9165696965323554
          entropy_coeff: 0.009999999999999998
          kl: 0.014144986745672048
          policy_loss: 0.03261263204945458
          total_loss: 0.031079990333980984
          vf_explained_var: 0.5079397559165955
          vf_loss: 0.007633000260425939
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,136,3460.74,136000,-2.4553,-2.17,-3.11,245.53


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 137000
  custom_metrics: {}
  date: 2021-11-05_13-29-58
  done: false
  episode_len_mean: 247.4
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.473999999999991
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 4
  episodes_total: 494
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.345178604125975e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9081091900666555
          entropy_coeff: 0.009999999999999998
          kl: 0.010464733762017413
          policy_loss: -0.028490339633491304
          total_loss: -0.02842297065589163
          vf_explained_var: 0.2992006838321686
          vf_loss: 0.00914841756845514
    num_agent_steps_sampled: 137000
    num_agent_steps_trained: 137000
    num_steps_sampled: 137000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,137,3483.6,137000,-2.474,-2.17,-3.11,247.4


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 138000
  custom_metrics: {}
  date: 2021-11-05_13-30-21
  done: false
  episode_len_mean: 248.75
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.4874999999999905
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 3
  episodes_total: 497
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.345178604125975e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9516097459528181
          entropy_coeff: 0.009999999999999998
          kl: 0.005873675781331005
          policy_loss: 0.03880852113167445
          total_loss: 0.03590427471531762
          vf_explained_var: 0.2830331325531006
          vf_loss: 0.006611833558417856
    num_agent_steps_sampled: 138000
    num_agent_steps_trained: 138000
    num_steps_sampled: 138000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,138,3506.12,138000,-2.4875,-2.17,-3.11,248.75


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 139000
  custom_metrics: {}
  date: 2021-11-05_13-30-43
  done: false
  episode_len_mean: 250.68
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.50679999999999
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 4
  episodes_total: 501
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.345178604125975e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8958989587095049
          entropy_coeff: 0.009999999999999998
          kl: 0.005112909978228547
          policy_loss: 0.024639444053173067
          total_loss: 0.025720892101526262
          vf_explained_var: 0.13827188313007355
          vf_loss: 0.010040422239237361
    num_agent_steps_sampled: 139000
    num_agent_steps_trained: 139000
    num_steps_sampled: 139000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,139,3528.63,139000,-2.5068,-2.17,-3.11,250.68


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-11-05_13-31-07
  done: false
  episode_len_mean: 251.66
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.5165999999999897
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 3
  episodes_total: 504
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.345178604125975e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7632269647386339
          entropy_coeff: 0.009999999999999998
          kl: 0.01045301980330014
          policy_loss: -0.09369845439990361
          total_loss: -0.09063559737470415
          vf_explained_var: 0.11660696566104889
          vf_loss: 0.010695087557865513
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,140,3552.6,140000,-2.5166,-2.17,-3.11,251.66


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 141000
  custom_metrics: {}
  date: 2021-11-05_13-31-32
  done: false
  episode_len_mean: 252.56
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.52559999999999
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 4
  episodes_total: 508
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.345178604125975e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7310293469164106
          entropy_coeff: 0.009999999999999998
          kl: 0.005120464823256694
          policy_loss: -0.07960613059500853
          total_loss: -0.07362182446651988
          vf_explained_var: 0.10501434653997421
          vf_loss: 0.013294579626785385
    num_agent_steps_sampled: 141000
    num_agent_steps_trained: 141000
    num_steps_sampled: 141000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,141,3577.17,141000,-2.5256,-2.17,-3.11,252.56




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 142000
  custom_metrics: {}
  date: 2021-11-05_13-32-14
  done: false
  episode_len_mean: 253.01
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.53009999999999
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 5
  episodes_total: 513
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.345178604125975e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7027547034952376
          entropy_coeff: 0.009999999999999998
          kl: 0.00395930172180492
          policy_loss: -0.01828429616159863
          total_loss: -0.010148035403754975
          vf_explained_var: 0.12985539436340332
          vf_loss: 0.015163796653764116
    num_agent_steps_sampled: 142000
    num_agent_steps_trained: 142000
    num_steps_sampled: 142000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,142,3619.39,142000,-2.5301,-2.17,-3.11,253.01


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 143000
  custom_metrics: {}
  date: 2021-11-05_13-32-39
  done: false
  episode_len_mean: 253.04
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.53039999999999
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 4
  episodes_total: 517
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1725893020629876e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6252522091070811
          entropy_coeff: 0.009999999999999998
          kl: 0.006204993426415445
          policy_loss: -0.010263527598645952
          total_loss: -0.005736287269327376
          vf_explained_var: 0.17244769632816315
          vf_loss: 0.010779753855119149
    num_agent_steps_sampled: 143000
    num_agent_steps_trained: 143000
    num_steps_sampled: 143000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,143,3644.04,143000,-2.5304,-2.17,-3.11,253.04


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2021-11-05_13-33-04
  done: false
  episode_len_mean: 252.49
  episode_media: {}
  episode_reward_max: -2.199999999999997
  episode_reward_mean: -2.52489999999999
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 4
  episodes_total: 521
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1725893020629876e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.5377734088235431
          entropy_coeff: 0.009999999999999998
          kl: 0.0028200865058906287
          policy_loss: 0.025847482515705955
          total_loss: 0.032147914833492705
          vf_explained_var: 0.09974610805511475
          vf_loss: 0.011678164193613661
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000
    num_steps_sampled: 144000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,144,3669.21,144000,-2.5249,-2.2,-3.11,252.49


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 145000
  custom_metrics: {}
  date: 2021-11-05_13-33-30
  done: false
  episode_len_mean: 251.95
  episode_media: {}
  episode_reward_max: -2.199999999999997
  episode_reward_mean: -2.51949999999999
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 4
  episodes_total: 525
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0862946510314938e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.5128382636441124
          entropy_coeff: 0.009999999999999998
          kl: 0.00441597489166341
          policy_loss: 0.03716992061171267
          total_loss: 0.04381225833462344
          vf_explained_var: 0.06695253401994705
          vf_loss: 0.011770720376322668
    num_agent_steps_sampled: 145000
    num_agent_steps_trained: 145000
    num_steps_sampled: 145000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,145,3694.84,145000,-2.5195,-2.2,-3.11,251.95


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 146000
  custom_metrics: {}
  date: 2021-11-05_13-33-56
  done: false
  episode_len_mean: 250.51
  episode_media: {}
  episode_reward_max: -2.199999999999997
  episode_reward_mean: -2.5050999999999903
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 4
  episodes_total: 529
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.431473255157469e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.46529025038083394
          entropy_coeff: 0.009999999999999998
          kl: 0.0027039343473398325
          policy_loss: -0.0002497670551141103
          total_loss: 0.00763524216082361
          vf_explained_var: 0.0764065608382225
          vf_loss: 0.012537910665074984
    num_agent_steps_sampled: 146000
    num_agent_steps_trained: 146000
    num_steps_sampled: 146000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,146,3720.77,146000,-2.5051,-2.2,-3.11,250.51


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 147000
  custom_metrics: {}
  date: 2021-11-05_13-34-21
  done: false
  episode_len_mean: 249.73
  episode_media: {}
  episode_reward_max: -2.199999999999997
  episode_reward_mean: -2.4972999999999903
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 4
  episodes_total: 533
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7157366275787345e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.46267191999488405
          entropy_coeff: 0.009999999999999998
          kl: 0.0016436000005635757
          policy_loss: -0.11716885657774077
          total_loss: -0.105118564185169
          vf_explained_var: 0.06899836659431458
          vf_loss: 0.01667701125972801
    num_agent_steps_sampled: 147000
    num_agent_steps_trained: 147000
    num_steps_sampled: 147000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,147,3746.27,147000,-2.4973,-2.2,-3.11,249.73


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2021-11-05_13-34-46
  done: false
  episode_len_mean: 249.1
  episode_media: {}
  episode_reward_max: -2.199999999999997
  episode_reward_mean: -2.4909999999999903
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 5
  episodes_total: 538
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3578683137893673e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.5573488775226805
          entropy_coeff: 0.009999999999999998
          kl: 0.003943743976357281
          policy_loss: -0.0007509758902920617
          total_loss: 0.010269719858964283
          vf_explained_var: 0.07970765978097916
          vf_loss: 0.01659418415899078
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_steps_sampled: 148000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,148,3771.32,148000,-2.491,-2.2,-3.11,249.1




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 149000
  custom_metrics: {}
  date: 2021-11-05_13-35-28
  done: false
  episode_len_mean: 248.86
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.4885999999999906
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 4
  episodes_total: 542
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.789341568946836e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.629904325803121
          entropy_coeff: 0.009999999999999998
          kl: 0.004847495422231103
          policy_loss: 0.059552335739135744
          total_loss: 0.06407799381348822
          vf_explained_var: 0.046299394220113754
          vf_loss: 0.01082469890308049
    num_agent_steps_sampled: 149000
    num_agent_steps_trained: 149000
    num_steps_sampled: 149000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,149,3813.56,149000,-2.4886,-2.08,-3.11,248.86


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 150000
  custom_metrics: {}
  date: 2021-11-05_13-35-53
  done: false
  episode_len_mean: 249.0
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.4899999999999904
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 4
  episodes_total: 546
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.394670784473418e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6908807072374555
          entropy_coeff: 0.009999999999999998
          kl: 0.006489469896338246
          policy_loss: 0.03327915651930703
          total_loss: 0.03873543788989385
          vf_explained_var: 0.030860411003232002
          vf_loss: 0.012365087980611457
    num_agent_steps_sampled: 150000
    num_agent_steps_trained: 150000
    num_steps_sampled: 150000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,150,3838.36,150000,-2.49,-2.08,-3.11,249


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 151000
  custom_metrics: {}
  date: 2021-11-05_13-36-17
  done: false
  episode_len_mean: 249.58
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.4957999999999902
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 4
  episodes_total: 550
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.394670784473418e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.7762875848346287
          entropy_coeff: 0.009999999999999998
          kl: 0.003813806457169891
          policy_loss: 0.01665878113773134
          total_loss: 0.02268304145998425
          vf_explained_var: 0.041668280959129333
          vf_loss: 0.013787136402808958
    num_agent_steps_sampled: 151000
    num_agent_steps_trained: 151000
    num_steps_sampled: 151000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,151,3862.24,151000,-2.4958,-2.08,-3.11,249.58


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2021-11-05_13-36-42
  done: false
  episode_len_mean: 249.84
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.4983999999999904
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 4
  episodes_total: 554
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.697335392236709e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.7185017353958554
          entropy_coeff: 0.009999999999999998
          kl: 0.0033420042531097415
          policy_loss: 0.02670677271154192
          total_loss: 0.03358034201794201
          vf_explained_var: 0.03374442085623741
          vf_loss: 0.014058588838411702
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_steps_sampled: 152000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,152,3886.91,152000,-2.4984,-2.08,-3.11,249.84


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 153000
  custom_metrics: {}
  date: 2021-11-05_13-37-07
  done: false
  episode_len_mean: 249.87
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.49869999999999
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 4
  episodes_total: 558
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.486676961183545e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.699403272734748
          entropy_coeff: 0.009999999999999998
          kl: 0.010784368584574287
          policy_loss: 0.013886453128523296
          total_loss: 0.022006265074014663
          vf_explained_var: 0.02990485355257988
          vf_loss: 0.015113846398890018
    num_agent_steps_sampled: 153000
    num_agent_steps_trained: 153000
    num_steps_sampled: 153000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,153,3911.66,153000,-2.4987,-2.08,-3.11,249.87


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 154000
  custom_metrics: {}
  date: 2021-11-05_13-37-32
  done: false
  episode_len_mean: 250.23
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.5022999999999906
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 4
  episodes_total: 562
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.486676961183545e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6756052990754445
          entropy_coeff: 0.009999999999999998
          kl: 0.0066680171864128114
          policy_loss: 0.016899063231216538
          total_loss: 0.024834479060437946
          vf_explained_var: 0.06205087900161743
          vf_loss: 0.01469147311937478
    num_agent_steps_sampled: 154000
    num_agent_steps_trained: 154000
    num_steps_sampled: 154000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,154,3936.89,154000,-2.5023,-2.08,-3.11,250.23


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 155000
  custom_metrics: {}
  date: 2021-11-05_13-37-57
  done: false
  episode_len_mean: 250.64
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.5063999999999904
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 4
  episodes_total: 566
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.486676961183545e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7033944567044575
          entropy_coeff: 0.009999999999999998
          kl: 0.004416709577488199
          policy_loss: 0.0012443284607595868
          total_loss: 0.00866318941116333
          vf_explained_var: 0.08111952990293503
          vf_loss: 0.014452804966519276
    num_agent_steps_sampled: 155000
    num_agent_steps_trained: 155000
    num_steps_sampled: 155000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,155,3962.49,155000,-2.5064,-2.08,-3.11,250.64




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 156000
  custom_metrics: {}
  date: 2021-11-05_13-38-41
  done: false
  episode_len_mean: 250.91
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.509099999999991
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 4
  episodes_total: 570
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.243338480591773e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.907716691493988
          entropy_coeff: 0.009999999999999998
          kl: 0.006667184066582542
          policy_loss: -0.04859165462354819
          total_loss: -0.043589783666862385
          vf_explained_var: 0.22257781028747559
          vf_loss: 0.014079037039644188
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_steps_sampled: 156000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,156,4006.19,156000,-2.5091,-2.08,-3.11,250.91


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 157000
  custom_metrics: {}
  date: 2021-11-05_13-39-07
  done: false
  episode_len_mean: 250.52
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.5051999999999905
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 5
  episodes_total: 575
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.243338480591773e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8236829148398506
          entropy_coeff: 0.009999999999999998
          kl: 0.006143950299019707
          policy_loss: -0.022033691654602686
          total_loss: -0.013980387068457074
          vf_explained_var: 0.24296249449253082
          vf_loss: 0.016290133860376147
    num_agent_steps_sampled: 157000
    num_agent_steps_trained: 157000
    num_steps_sampled: 157000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,157,4031.86,157000,-2.5052,-2.08,-3.11,250.52


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 158000
  custom_metrics: {}
  date: 2021-11-05_13-39-32
  done: false
  episode_len_mean: 250.6
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.50599999999999
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 4
  episodes_total: 579
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.243338480591773e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9687730179892646
          entropy_coeff: 0.009999999999999998
          kl: 0.00750172084241064
          policy_loss: -0.015388122035397423
          total_loss: -0.013731792900297376
          vf_explained_var: 0.4081806540489197
          vf_loss: 0.011344058863404725
    num_agent_steps_sampled: 158000
    num_agent_steps_trained: 158000
    num_steps_sampled: 158000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,158,4056.85,158000,-2.506,-2.08,-3.11,250.6


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 159000
  custom_metrics: {}
  date: 2021-11-05_13-39-57
  done: false
  episode_len_mean: 251.04
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.51039999999999
  episode_reward_min: -3.1099999999999777
  episodes_this_iter: 4
  episodes_total: 583
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.243338480591773e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8183708329995473
          entropy_coeff: 0.009999999999999998
          kl: 0.007972006648954514
          policy_loss: -0.008420710927910274
          total_loss: -0.004930271787775887
          vf_explained_var: 0.33080601692199707
          vf_loss: 0.011674147171692715
    num_agent_steps_sampled: 159000
    num_agent_steps_trained: 159000
    num_steps_sampled: 159000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,159,4081.69,159000,-2.5104,-2.08,-3.11,251.04


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-11-05_13-40-22
  done: false
  episode_len_mean: 249.21
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.492099999999991
  episode_reward_min: -3.00999999999998
  episodes_this_iter: 4
  episodes_total: 587
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.243338480591773e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.683575134144889
          entropy_coeff: 0.009999999999999998
          kl: 0.008134075879818504
          policy_loss: 0.00806247124241458
          total_loss: 0.013478967257671885
          vf_explained_var: 0.2066420465707779
          vf_loss: 0.012252248047540585
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 160

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,160,4106.85,160000,-2.4921,-2.08,-3.01,249.21


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 161000
  custom_metrics: {}
  date: 2021-11-05_13-40-48
  done: false
  episode_len_mean: 247.24
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.472399999999991
  episode_reward_min: -2.869999999999983
  episodes_this_iter: 4
  episodes_total: 591
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.243338480591773e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6070586403210958
          entropy_coeff: 0.009999999999999998
          kl: 0.004207031085863456
          policy_loss: 0.033278433109323186
          total_loss: 0.039505783261524305
          vf_explained_var: 0.11768187582492828
          vf_loss: 0.012297938081125418
    num_agent_steps_sampled: 161000
    num_agent_steps_trained: 161000
    num_steps_sampled: 161000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,161,4133.04,161000,-2.4724,-2.08,-2.87,247.24


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 162000
  custom_metrics: {}
  date: 2021-11-05_13-41-15
  done: false
  episode_len_mean: 245.41
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.4540999999999915
  episode_reward_min: -2.8399999999999834
  episodes_this_iter: 4
  episodes_total: 595
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1216692402958863e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.5278968536191516
          entropy_coeff: 0.009999999999999998
          kl: 0.0067530542835865974
          policy_loss: -0.03998652100563049
          total_loss: -0.03261587247252464
          vf_explained_var: 0.09689421951770782
          vf_loss: 0.012649617282052834
    num_agent_steps_sampled: 162000
    num_agent_steps_trained: 162000
    num_steps_sampled: 162000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,162,4159.35,162000,-2.4541,-2.08,-2.84,245.41




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 163000
  custom_metrics: {}
  date: 2021-11-05_13-41-57
  done: false
  episode_len_mean: 242.65
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4264999999999923
  episode_reward_min: -2.7799999999999847
  episodes_this_iter: 5
  episodes_total: 600
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1216692402958863e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.5296232256624434
          entropy_coeff: 0.009999999999999998
          kl: 0.00446126540613155
          policy_loss: -0.00983465015888214
          total_loss: 0.0012833007507854037
          vf_explained_var: 0.11013251543045044
          vf_loss: 0.01641417993232608
    num_agent_steps_sampled: 163000
    num_agent_steps_trained: 163000
    num_steps_sampled: 163000
    num_steps_trained: 163000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,163,4201.79,163000,-2.4265,-2.06,-2.78,242.65


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 164000
  custom_metrics: {}
  date: 2021-11-05_13-42-25
  done: false
  episode_len_mean: 241.1
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.410999999999993
  episode_reward_min: -2.7399999999999856
  episodes_this_iter: 4
  episodes_total: 604
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0608346201479432e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.5741026093562444
          entropy_coeff: 0.009999999999999998
          kl: 0.006578634821027269
          policy_loss: 0.0458981004026201
          total_loss: 0.05075384378433227
          vf_explained_var: 0.10984127223491669
          vf_loss: 0.010596768889162276
    num_agent_steps_sampled: 164000
    num_agent_steps_trained: 164000
    num_steps_sampled: 164000
    num_steps_trained: 164000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,164,4230.06,164000,-2.411,-2.06,-2.74,241.1


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 165000
  custom_metrics: {}
  date: 2021-11-05_13-42-52
  done: false
  episode_len_mean: 240.02
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.400199999999993
  episode_reward_min: -2.6099999999999883
  episodes_this_iter: 4
  episodes_total: 608
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0608346201479432e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.5036576820744408
          entropy_coeff: 0.009999999999999998
          kl: 0.0022811563793156764
          policy_loss: -0.07894169199797842
          total_loss: -0.06839842639035648
          vf_explained_var: 0.09127174317836761
          vf_loss: 0.01557984332450562
    num_agent_steps_sampled: 165000
    num_agent_steps_trained: 165000
    num_steps_sampled: 165000
    num_steps_trained: 165000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,165,4256.68,165000,-2.4002,-2.06,-2.61,240.02


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 166000
  custom_metrics: {}
  date: 2021-11-05_13-43-18
  done: false
  episode_len_mean: 239.22
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.3921999999999928
  episode_reward_min: -2.6099999999999883
  episodes_this_iter: 5
  episodes_total: 613
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.304173100739716e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.5277788559595744
          entropy_coeff: 0.009999999999999998
          kl: 0.005767598429770891
          policy_loss: 0.01512135225865576
          total_loss: 0.02430174657040172
          vf_explained_var: 0.12449944019317627
          vf_loss: 0.014458183188819224
    num_agent_steps_sampled: 166000
    num_agent_steps_trained: 166000
    num_steps_sampled: 166000
    num_steps_trained: 166000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,166,4282.89,166000,-2.3922,-2.06,-2.61,239.22


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 167000
  custom_metrics: {}
  date: 2021-11-05_13-43-45
  done: false
  episode_len_mean: 238.43
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.384299999999993
  episode_reward_min: -2.6099999999999883
  episodes_this_iter: 4
  episodes_total: 617
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.304173100739716e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.35765180024835797
          entropy_coeff: 0.009999999999999998
          kl: 0.007833446012294647
          policy_loss: -0.01546017188164923
          total_loss: -0.005589893046352598
          vf_explained_var: 0.07139139622449875
          vf_loss: 0.013446796995898088
    num_agent_steps_sampled: 167000
    num_agent_steps_trained: 167000
    num_steps_sampled: 167000
    num_steps_trained: 167000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,167,4309.62,167000,-2.3843,-2.06,-2.61,238.43


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 168000
  custom_metrics: {}
  date: 2021-11-05_13-44-11
  done: false
  episode_len_mean: 237.64
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.376399999999993
  episode_reward_min: -2.6099999999999883
  episodes_this_iter: 5
  episodes_total: 622
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.304173100739716e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.39761488835016884
          entropy_coeff: 0.009999999999999998
          kl: 0.00598219150046708
          policy_loss: -0.010486623148123424
          total_loss: 0.0034614894125196667
          vf_explained_var: 0.07655373960733414
          vf_loss: 0.017924261734717423
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 168000
    num_steps_sampled: 168000
    num_steps_trained: 168000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,168,4335.98,168000,-2.3764,-2.06,-2.61,237.64


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 169000
  custom_metrics: {}
  date: 2021-11-05_13-44-38
  done: false
  episode_len_mean: 237.47
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.3746999999999936
  episode_reward_min: -2.6099999999999883
  episodes_this_iter: 4
  episodes_total: 626
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.304173100739716e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.45592966278394065
          entropy_coeff: 0.009999999999999998
          kl: 0.003294205873406971
          policy_loss: 0.05726670225461324
          total_loss: 0.06422638479206297
          vf_explained_var: 0.09901044517755508
          vf_loss: 0.011518978562930392
    num_agent_steps_sampled: 169000
    num_agent_steps_trained: 169000
    num_steps_sampled: 169000
    num_steps_trained: 169000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,169,4362.21,169000,-2.3747,-2.06,-2.61,237.47




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 170000
  custom_metrics: {}
  date: 2021-11-05_13-45-22
  done: false
  episode_len_mean: 236.98
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.369799999999993
  episode_reward_min: -2.6099999999999883
  episodes_this_iter: 5
  episodes_total: 631
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.652086550369858e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.4926432407564587
          entropy_coeff: 0.009999999999999998
          kl: 0.00632106101328037
          policy_loss: -0.021926499406496682
          total_loss: -0.006156758632924822
          vf_explained_var: 0.07012569159269333
          vf_loss: 0.02069617458101776
    num_agent_steps_sampled: 170000
    num_agent_steps_trained: 170000
    num_steps_sampled: 170000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,170,4406.33,170000,-2.3698,-2.01,-2.61,236.98


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 171000
  custom_metrics: {}
  date: 2021-11-05_13-45-48
  done: false
  episode_len_mean: 236.81
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.3680999999999934
  episode_reward_min: -2.6099999999999883
  episodes_this_iter: 4
  episodes_total: 635
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.652086550369858e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7472034189436171
          entropy_coeff: 0.009999999999999998
          kl: 0.008881134059534349
          policy_loss: 0.013315613898966048
          total_loss: 0.020028446738918623
          vf_explained_var: 0.19419506192207336
          vf_loss: 0.01418486713535256
    num_agent_steps_sampled: 171000
    num_agent_steps_trained: 171000
    num_steps_sampled: 171000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,171,4432.17,171000,-2.3681,-2.01,-2.61,236.81


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 172000
  custom_metrics: {}
  date: 2021-11-05_13-46-14
  done: false
  episode_len_mean: 236.92
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.3691999999999935
  episode_reward_min: -2.6099999999999883
  episodes_this_iter: 4
  episodes_total: 639
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.652086550369858e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7318957911597358
          entropy_coeff: 0.009999999999999998
          kl: 0.004146211980034783
          policy_loss: 0.030220864050918155
          total_loss: 0.03651692060132821
          vf_explained_var: 0.17903096973896027
          vf_loss: 0.013615013690044482
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_steps_sampled: 172000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,172,4458.06,172000,-2.3692,-2.01,-2.61,236.92


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 173000
  custom_metrics: {}
  date: 2021-11-05_13-46-39
  done: false
  episode_len_mean: 236.95
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.3694999999999933
  episode_reward_min: -2.6099999999999883
  episodes_this_iter: 4
  episodes_total: 643
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.326043275184929e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.667663946416643
          entropy_coeff: 0.009999999999999998
          kl: 0.003990509727612195
          policy_loss: 0.02195914275944233
          total_loss: 0.029600259620282385
          vf_explained_var: 0.16113252937793732
          vf_loss: 0.014317756104800437
    num_agent_steps_sampled: 173000
    num_agent_steps_trained: 173000
    num_steps_sampled: 173000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,173,4483.67,173000,-2.3695,-2.01,-2.61,236.95


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 174000
  custom_metrics: {}
  date: 2021-11-05_13-47-05
  done: false
  episode_len_mean: 236.67
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.366699999999993
  episode_reward_min: -2.6099999999999883
  episodes_this_iter: 4
  episodes_total: 647
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.630216375924645e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6505878640545739
          entropy_coeff: 0.009999999999999998
          kl: 0.004999853343215616
          policy_loss: -0.002950268652704027
          total_loss: 0.0046524391406112245
          vf_explained_var: 0.16275620460510254
          vf_loss: 0.014108587791108422
    num_agent_steps_sampled: 174000
    num_agent_steps_trained: 174000
    num_steps_sampled: 174000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,174,4509.19,174000,-2.3667,-2.01,-2.61,236.67


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 175000
  custom_metrics: {}
  date: 2021-11-05_13-47-31
  done: false
  episode_len_mean: 236.05
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.360499999999993
  episode_reward_min: -2.5899999999999888
  episodes_this_iter: 4
  episodes_total: 651
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3151081879623224e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6311889602078332
          entropy_coeff: 0.009999999999999998
          kl: 0.005754374059996634
          policy_loss: -0.05319005408220821
          total_loss: -0.045604460934797926
          vf_explained_var: 0.21806876361370087
          vf_loss: 0.013897481571055121
    num_agent_steps_sampled: 175000
    num_agent_steps_trained: 175000
    num_steps_sampled: 175000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,175,4535.18,175000,-2.3605,-2.01,-2.59,236.05


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 176000
  custom_metrics: {}
  date: 2021-11-05_13-47-57
  done: false
  episode_len_mean: 235.6
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.355999999999993
  episode_reward_min: -2.5899999999999888
  episodes_this_iter: 5
  episodes_total: 656
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3151081879623224e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7043166445361243
          entropy_coeff: 0.009999999999999998
          kl: 0.0033674219295875218
          policy_loss: -0.01984847287336985
          total_loss: -0.009194376029902035
          vf_explained_var: 0.19363901019096375
          vf_loss: 0.01769726076680753
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 176000
    num_steps_sampled: 176000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,176,4560.95,176000,-2.356,-2.01,-2.59,235.6




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 177000
  custom_metrics: {}
  date: 2021-11-05_13-48-39
  done: false
  episode_len_mean: 234.79
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.3478999999999934
  episode_reward_min: -2.5899999999999888
  episodes_this_iter: 4
  episodes_total: 660
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6575540939811612e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6283110370238622
          entropy_coeff: 0.009999999999999998
          kl: 0.0043161114793088574
          policy_loss: 0.023408863693475723
          total_loss: 0.031206239180432426
          vf_explained_var: 0.1596861481666565
          vf_loss: 0.014080485794693232
    num_agent_steps_sampled: 177000
    num_agent_steps_trained: 177000
    num_steps_sampled: 177000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,177,4603.59,177000,-2.3479,-2.01,-2.59,234.79


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 178000
  custom_metrics: {}
  date: 2021-11-05_13-49-07
  done: false
  episode_len_mean: 234.87
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.3486999999999942
  episode_reward_min: -2.5899999999999888
  episodes_this_iter: 4
  episodes_total: 664
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.287770469905806e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.6026787175072564
          entropy_coeff: 0.009999999999999998
          kl: 0.005308264976631152
          policy_loss: 0.0005505211651325226
          total_loss: 0.008766021579504013
          vf_explained_var: 0.1685926765203476
          vf_loss: 0.014242290974491173
    num_agent_steps_sampled: 178000
    num_agent_steps_trained: 178000
    num_steps_sampled: 178000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,178,4631.15,178000,-2.3487,-2.01,-2.59,234.87


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 179000
  custom_metrics: {}
  date: 2021-11-05_13-49-33
  done: false
  episode_len_mean: 234.28
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.342799999999994
  episode_reward_min: -2.5899999999999888
  episodes_this_iter: 5
  episodes_total: 669
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.287770469905806e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.5308454768525229
          entropy_coeff: 0.009999999999999998
          kl: 0.00396943897225523
          policy_loss: -0.00953130316403177
          total_loss: 0.00298145968053076
          vf_explained_var: 0.11536981910467148
          vf_loss: 0.017821217825015386
    num_agent_steps_sampled: 179000
    num_agent_steps_trained: 179000
    num_steps_sampled: 179000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,179,4657.5,179000,-2.3428,-2.01,-2.59,234.28


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-11-05_13-49-59
  done: false
  episode_len_mean: 234.24
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.3423999999999943
  episode_reward_min: -2.5899999999999888
  episodes_this_iter: 4
  episodes_total: 673
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.143885234952903e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.7282988276746538
          entropy_coeff: 0.009999999999999998
          kl: 0.008137576265871064
          policy_loss: 0.012494254608949025
          total_loss: 0.019113247551851804
          vf_explained_var: 0.18421633541584015
          vf_loss: 0.013901979786654313
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,180,4683.29,180000,-2.3424,-2.01,-2.59,234.24


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 181000
  custom_metrics: {}
  date: 2021-11-05_13-50-25
  done: false
  episode_len_mean: 233.91
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.3390999999999935
  episode_reward_min: -2.5899999999999888
  episodes_this_iter: 4
  episodes_total: 677
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.143885234952903e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.6393214801947276
          entropy_coeff: 0.009999999999999998
          kl: 0.0027168431735939674
          policy_loss: 0.04350094265407986
          total_loss: 0.05059328261348936
          vf_explained_var: 0.12247509509325027
          vf_loss: 0.01348555703750915
    num_agent_steps_sampled: 181000
    num_agent_steps_trained: 181000
    num_steps_sampled: 181000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,181,4709.23,181000,-2.3391,-2.01,-2.59,233.91


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 182000
  custom_metrics: {}
  date: 2021-11-05_13-50-51
  done: false
  episode_len_mean: 233.37
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.3336999999999937
  episode_reward_min: -2.5599999999999894
  episodes_this_iter: 4
  episodes_total: 681
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0719426174764515e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.6859287427531349
          entropy_coeff: 0.009999999999999998
          kl: 0.009476322440835676
          policy_loss: 0.0014058228996064927
          total_loss: 0.009813447048266729
          vf_explained_var: 0.13034255802631378
          vf_loss: 0.015266907153030236
    num_agent_steps_sampled: 182000
    num_agent_steps_trained: 182000
    num_steps_sampled: 182000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,182,4735.26,182000,-2.3337,-2.01,-2.56,233.37


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 183000
  custom_metrics: {}
  date: 2021-11-05_13-51-16
  done: false
  episode_len_mean: 233.59
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.335899999999994
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 685
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0719426174764515e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.815702219804128
          entropy_coeff: 0.009999999999999998
          kl: 0.013303988368257792
          policy_loss: -0.010710909424556627
          total_loss: -0.004627900529238913
          vf_explained_var: 0.2742016315460205
          vf_loss: 0.01424003518703911
    num_agent_steps_sampled: 183000
    num_agent_steps_trained: 183000
    num_steps_sampled: 183000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,183,4760.33,183000,-2.3359,-2.01,-2.57,233.59


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2021-11-05_13-51-42
  done: false
  episode_len_mean: 234.07
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.340699999999994
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 689
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0719426174764515e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.7657415555583106
          entropy_coeff: 0.009999999999999998
          kl: 0.009661002852156663
          policy_loss: -0.019447969107164276
          total_loss: -0.013670036776198281
          vf_explained_var: 0.3287248909473419
          vf_loss: 0.013435347585214509
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_steps_sampled: 184000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,184,4786.21,184000,-2.3407,-2.01,-2.57,234.07




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 185000
  custom_metrics: {}
  date: 2021-11-05_13-52-26
  done: false
  episode_len_mean: 233.91
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.3390999999999944
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 5
  episodes_total: 694
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0719426174764515e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.6369286974271139
          entropy_coeff: 0.009999999999999998
          kl: 0.005171979542034322
          policy_loss: -0.018419259041547776
          total_loss: -0.008111255533165402
          vf_explained_var: 0.2449359893798828
          vf_loss: 0.01667728965274162
    num_agent_steps_sampled: 185000
    num_agent_steps_trained: 185000
    num_steps_sampled: 185000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,185,4830.33,185000,-2.3391,-2.01,-2.57,233.91


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 186000
  custom_metrics: {}
  date: 2021-11-05_13-52-52
  done: false
  episode_len_mean: 234.12
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.341199999999994
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 698
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0719426174764515e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.5257167832718955
          entropy_coeff: 0.009999999999999998
          kl: 0.010500768388526578
          policy_loss: 0.045098782744672564
          total_loss: 0.050274917980035146
          vf_explained_var: 0.2230936735868454
          vf_loss: 0.010433301050215959
    num_agent_steps_sampled: 186000
    num_agent_steps_trained: 186000
    num_steps_sampled: 186000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,186,4856.09,186000,-2.3412,-2.01,-2.57,234.12


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 187000
  custom_metrics: {}
  date: 2021-11-05_13-53-18
  done: false
  episode_len_mean: 234.33
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.343299999999994
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 702
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0719426174764515e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.3631708734565311
          entropy_coeff: 0.009999999999999998
          kl: 0.005529699267430033
          policy_loss: 0.03957499158051279
          total_loss: 0.04822082602315479
          vf_explained_var: 0.12421746551990509
          vf_loss: 0.012277540828411778
    num_agent_steps_sampled: 187000
    num_agent_steps_trained: 187000
    num_steps_sampled: 187000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,187,4882.49,187000,-2.3433,-2.01,-2.57,234.33


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 188000
  custom_metrics: {}
  date: 2021-11-05_13-53-45
  done: false
  episode_len_mean: 234.41
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.344099999999994
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 5
  episodes_total: 707
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0719426174764515e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.41752979391151
          entropy_coeff: 0.009999999999999998
          kl: 0.009596147098763571
          policy_loss: -0.017786120209428997
          total_loss: -0.003045998016993205
          vf_explained_var: 0.09367639571428299
          vf_loss: 0.0189154207913412
    num_agent_steps_sampled: 188000
    num_agent_steps_trained: 188000
    num_steps_sampled: 188000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,188,4909.13,188000,-2.3441,-2.01,-2.57,234.41


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 189000
  custom_metrics: {}
  date: 2021-11-05_13-54-11
  done: false
  episode_len_mean: 234.62
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.3461999999999934
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 711
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0719426174764515e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.6889488269885381
          entropy_coeff: 0.009999999999999998
          kl: 0.01755833119879766
          policy_loss: 0.03383192535903719
          total_loss: 0.041033066146903566
          vf_explained_var: 0.1347283571958542
          vf_loss: 0.014090628766765197
    num_agent_steps_sampled: 189000
    num_agent_steps_trained: 189000
    num_steps_sampled: 189000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,189,4935.17,189000,-2.3462,-2.01,-2.57,234.62


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 190000
  custom_metrics: {}
  date: 2021-11-05_13-54-37
  done: false
  episode_len_mean: 235.39
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.353899999999993
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 715
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0719426174764515e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.7710171805487739
          entropy_coeff: 0.009999999999999998
          kl: 0.009383088546304721
          policy_loss: 0.021188941018448936
          total_loss: 0.028285601403978136
          vf_explained_var: 0.15161488950252533
          vf_loss: 0.014806833376901017
    num_agent_steps_sampled: 190000
    num_agent_steps_trained: 190000
    num_steps_sampled: 190000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,190,4960.63,190000,-2.3539,-2.01,-2.57,235.39


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 191000
  custom_metrics: {}
  date: 2021-11-05_13-55-02
  done: false
  episode_len_mean: 235.92
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.3591999999999933
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 719
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0719426174764515e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.7370011614428627
          entropy_coeff: 0.009999999999999998
          kl: 0.01219217321271431
          policy_loss: 0.029439872627456985
          total_loss: 0.03679016054504448
          vf_explained_var: 0.18172796070575714
          vf_loss: 0.014720297542711099
    num_agent_steps_sampled: 191000
    num_agent_steps_trained: 191000
    num_steps_sampled: 191000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,191,4986.06,191000,-2.3592,-2.01,-2.57,235.92




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 192000
  custom_metrics: {}
  date: 2021-11-05_13-55-45
  done: false
  episode_len_mean: 237.26
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.372599999999993
  episode_reward_min: -3.2099999999999755
  episodes_this_iter: 4
  episodes_total: 723
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0719426174764515e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.137811295191447
          entropy_coeff: 0.009999999999999998
          kl: 0.029045894046603057
          policy_loss: 0.0022987805306911467
          total_loss: 0.005106730924712287
          vf_explained_var: 0.2992064654827118
          vf_loss: 0.014186067993028297
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_steps_sampled: 192000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,192,5028.68,192000,-2.3726,-2.01,-3.21,237.26


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 193000
  custom_metrics: {}
  date: 2021-11-05_13-56-09
  done: false
  episode_len_mean: 238.82
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.3881999999999923
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 727
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.107913926214678e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.3194200992584229
          entropy_coeff: 0.009999999999999998
          kl: 0.033201948307800364
          policy_loss: -0.00587011757824156
          total_loss: -0.006322735713587867
          vf_explained_var: 0.4545483887195587
          vf_loss: 0.01274158339947462
    num_agent_steps_sampled: 193000
    num_agent_steps_trained: 193000
    num_steps_sampled: 193000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,193,5053.41,193000,-2.3882,-2.01,-3.41,238.82


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 194000
  custom_metrics: {}
  date: 2021-11-05_13-56-35
  done: false
  episode_len_mean: 239.7
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.3969999999999927
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 731
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.661870889322018e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.8968892627292209
          entropy_coeff: 0.009999999999999998
          kl: 0.007302317879243327
          policy_loss: 0.0038595225248071884
          total_loss: 0.0076855785730812285
          vf_explained_var: 0.3620249927043915
          vf_loss: 0.012794950407826237
    num_agent_steps_sampled: 194000
    num_agent_steps_trained: 194000
    num_steps_sampled: 194000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,194,5079,194000,-2.397,-2.01,-3.41,239.7


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 195000
  custom_metrics: {}
  date: 2021-11-05_13-57-01
  done: false
  episode_len_mean: 239.97
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.399699999999993
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 735
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.661870889322018e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.8397204703754849
          entropy_coeff: 0.009999999999999998
          kl: 0.006797649131306748
          policy_loss: 0.0013682994991540908
          total_loss: 0.005366538796159956
          vf_explained_var: 0.31652402877807617
          vf_loss: 0.012395444388190905
    num_agent_steps_sampled: 195000
    num_agent_steps_trained: 195000
    num_steps_sampled: 195000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,195,5104.53,195000,-2.3997,-2.01,-3.41,239.97


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 196000
  custom_metrics: {}
  date: 2021-11-05_13-57-26
  done: false
  episode_len_mean: 240.14
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4013999999999927
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 739
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.661870889322018e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.7647822366820441
          entropy_coeff: 0.009999999999999998
          kl: 0.005927573795144604
          policy_loss: 0.03158718273043633
          total_loss: 0.036231518205669194
          vf_explained_var: 0.28210318088531494
          vf_loss: 0.012292159721255302
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 196000
    num_steps_sampled: 196000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,196,5129.93,196000,-2.4014,-2.01,-3.41,240.14


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 197000
  custom_metrics: {}
  date: 2021-11-05_13-57-52
  done: false
  episode_len_mean: 240.19
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4018999999999924
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 743
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.661870889322018e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.6618800391753514
          entropy_coeff: 0.009999999999999998
          kl: 0.014286971211074355
          policy_loss: 0.03954135618276066
          total_loss: 0.045055737015273836
          vf_explained_var: 0.19761084020137787
          vf_loss: 0.01213318006032043
    num_agent_steps_sampled: 197000
    num_agent_steps_trained: 197000
    num_steps_sampled: 197000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,197,5156.07,197000,-2.4019,-2.01,-3.41,240.19


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 198000
  custom_metrics: {}
  date: 2021-11-05_13-58-18
  done: false
  episode_len_mean: 240.12
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.4011999999999927
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 747
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.661870889322018e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.7054742945565118
          entropy_coeff: 0.009999999999999998
          kl: 0.005164818590493859
          policy_loss: 0.0014115905596150291
          total_loss: 0.00879928461379475
          vf_explained_var: 0.13311372697353363
          vf_loss: 0.014442437483618656
    num_agent_steps_sampled: 198000
    num_agent_steps_trained: 198000
    num_steps_sampled: 198000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,198,5182.24,198000,-2.4012,-2.01,-3.41,240.12




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 199000
  custom_metrics: {}
  date: 2021-11-05_13-59-01
  done: false
  episode_len_mean: 239.58
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.395799999999993
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 5
  episodes_total: 752
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.661870889322018e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.6944010251098209
          entropy_coeff: 0.009999999999999998
          kl: 0.010558904621509177
          policy_loss: -0.029743576463725833
          total_loss: -0.01881338854630788
          vf_explained_var: 0.13812676072120667
          vf_loss: 0.01787419718586736
    num_agent_steps_sampled: 199000
    num_agent_steps_trained: 199000
    num_steps_sampled: 199000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,199,5224.8,199000,-2.3958,-2.01,-3.41,239.58


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-11-05_13-59-27
  done: false
  episode_len_mean: 239.78
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.397799999999993
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 756
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.661870889322018e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.8461841079923842
          entropy_coeff: 0.009999999999999998
          kl: 0.007226508089109194
          policy_loss: 0.018390707009368474
          total_loss: 0.023553370063503585
          vf_explained_var: 0.1487186998128891
          vf_loss: 0.013624502583924267
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,200,5250.33,200000,-2.3978,-2.01,-3.41,239.78


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 201000
  custom_metrics: {}
  date: 2021-11-05_13-59-52
  done: false
  episode_len_mean: 240.21
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.402099999999993
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 760
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.661870889322018e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.7020658195018769
          entropy_coeff: 0.009999999999999998
          kl: 0.004946150592584786
          policy_loss: 0.0393944177362654
          total_loss: 0.044813755734099285
          vf_explained_var: 0.17147324979305267
          vf_loss: 0.012439994865821468
    num_agent_steps_sampled: 201000
    num_agent_steps_trained: 201000
    num_steps_sampled: 201000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,201,5276.01,201000,-2.4021,-2.1,-3.41,240.21


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 202000
  custom_metrics: {}
  date: 2021-11-05_14-00-18
  done: false
  episode_len_mean: 240.16
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.401599999999993
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 764
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.330935444661009e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.7366763406329685
          entropy_coeff: 0.009999999999999998
          kl: 0.0032733608041487137
          policy_loss: 0.027980132897694906
          total_loss: 0.03389819516903824
          vf_explained_var: 0.11058985441923141
          vf_loss: 0.01328482526457972
    num_agent_steps_sampled: 202000
    num_agent_steps_trained: 202000
    num_steps_sampled: 202000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,202,5302.07,202000,-2.4016,-2.1,-3.41,240.16


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 203000
  custom_metrics: {}
  date: 2021-11-05_14-00-44
  done: false
  episode_len_mean: 240.29
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.4028999999999927
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 768
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1654677223305046e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.7228469570477803
          entropy_coeff: 0.009999999999999998
          kl: 0.00635797108248555
          policy_loss: -0.013250972578922908
          total_loss: -0.00557917488945855
          vf_explained_var: 0.11561299115419388
          vf_loss: 0.01490026582032442
    num_agent_steps_sampled: 203000
    num_agent_steps_trained: 203000
    num_steps_sampled: 203000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,203,5327.74,203000,-2.4029,-2.1,-3.41,240.29


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 204000
  custom_metrics: {}
  date: 2021-11-05_14-01-09
  done: false
  episode_len_mean: 241.01
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.4100999999999924
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 772
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1654677223305046e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.9358732958634695
          entropy_coeff: 0.009999999999999998
          kl: 0.016633576618256207
          policy_loss: -0.016236735714806452
          total_loss: -0.011721619798077478
          vf_explained_var: 0.26497983932495117
          vf_loss: 0.01387384898132748
    num_agent_steps_sampled: 204000
    num_agent_steps_trained: 204000
    num_steps_sampled: 204000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,204,5352.71,204000,-2.4101,-2.1,-3.41,241.01


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 205000
  custom_metrics: {}
  date: 2021-11-05_14-01-34
  done: false
  episode_len_mean: 241.59
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.4158999999999926
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 776
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1654677223305046e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.8303950137562222
          entropy_coeff: 0.009999999999999998
          kl: 0.01282640856169416
          policy_loss: -0.08388804205589824
          total_loss: -0.07844892591238022
          vf_explained_var: 0.2824062705039978
          vf_loss: 0.013743070937279198
    num_agent_steps_sampled: 205000
    num_agent_steps_trained: 205000
    num_steps_sampled: 205000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,205,5377.57,205000,-2.4159,-2.1,-3.41,241.59




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 206000
  custom_metrics: {}
  date: 2021-11-05_14-02-15
  done: false
  episode_len_mean: 241.67
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.416699999999992
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 780
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1654677223305046e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.7394134879112244
          entropy_coeff: 0.009999999999999998
          kl: 0.007755216539764298
          policy_loss: -0.10776876227723228
          total_loss: -0.09680361913310156
          vf_explained_var: 0.25728151202201843
          vf_loss: 0.01835927295808991
    num_agent_steps_sampled: 206000
    num_agent_steps_trained: 206000
    num_steps_sampled: 206000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,206,5418.84,206000,-2.4167,-2.1,-3.41,241.67


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 207000
  custom_metrics: {}
  date: 2021-11-05_14-02-43
  done: false
  episode_len_mean: 241.74
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.417399999999992
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 5
  episodes_total: 785
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1654677223305046e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.7039848850833045
          entropy_coeff: 0.009999999999999998
          kl: 0.005425064080491914
          policy_loss: -0.027915754541754722
          total_loss: -0.019102760901053745
          vf_explained_var: 0.33930346369743347
          vf_loss: 0.01585284305943383
    num_agent_steps_sampled: 207000
    num_agent_steps_trained: 207000
    num_steps_sampled: 207000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,207,5446.5,207000,-2.4174,-2.1,-3.41,241.74


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 208000
  custom_metrics: {}
  date: 2021-11-05_14-03-09
  done: false
  episode_len_mean: 241.14
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.4113999999999924
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 789
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1654677223305046e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.6645573099454244
          entropy_coeff: 0.009999999999999998
          kl: 0.007109398880542865
          policy_loss: 0.01367557719349861
          total_loss: 0.01919272877275944
          vf_explained_var: 0.320404052734375
          vf_loss: 0.01216272579299079
    num_agent_steps_sampled: 208000
    num_agent_steps_trained: 208000
    num_steps_sampled: 208000
    num_steps_trained: 20

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,208,5472.54,208000,-2.4114,-2.1,-3.41,241.14


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 209000
  custom_metrics: {}
  date: 2021-11-05_14-03-36
  done: false
  episode_len_mean: 241.12
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.4111999999999925
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 793
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1654677223305046e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.35581239031420814
          entropy_coeff: 0.009999999999999998
          kl: 0.005411714393429652
          policy_loss: 0.0255819550404946
          total_loss: 0.03479044768545363
          vf_explained_var: 0.15361805260181427
          vf_loss: 0.012766613014456299
    num_agent_steps_sampled: 209000
    num_agent_steps_trained: 209000
    num_steps_sampled: 209000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,209,5499.73,209000,-2.4112,-2.1,-3.41,241.12


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 210000
  custom_metrics: {}
  date: 2021-11-05_14-04-03
  done: false
  episode_len_mean: 240.58
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.4057999999999926
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 5
  episodes_total: 798
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1654677223305046e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.347070355547799
          entropy_coeff: 0.009999999999999998
          kl: 0.003077957078354792
          policy_loss: -0.01267384679781066
          total_loss: 0.00044859424233436587
          vf_explained_var: 0.11190474778413773
          vf_loss: 0.01659314444081651
    num_agent_steps_sampled: 210000
    num_agent_steps_trained: 210000
    num_steps_sampled: 210000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,210,5526.71,210000,-2.4058,-2.1,-3.41,240.58


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 211000
  custom_metrics: {}
  date: 2021-11-05_14-04-31
  done: false
  episode_len_mean: 240.19
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.4018999999999924
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 802
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.827338611652523e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.3140196230676439
          entropy_coeff: 0.009999999999999998
          kl: 0.0034367585253627275
          policy_loss: 0.03346877346436183
          total_loss: 0.04334989835818608
          vf_explained_var: 0.06054243445396423
          vf_loss: 0.01302131868691908
    num_agent_steps_sampled: 211000
    num_agent_steps_trained: 211000
    num_steps_sampled: 211000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,211,5554.11,211000,-2.4019,-2.1,-3.41,240.19


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 212000
  custom_metrics: {}
  date: 2021-11-05_14-04-58
  done: false
  episode_len_mean: 239.9
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.3989999999999925
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 5
  episodes_total: 807
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9136693058262614e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.3715151223871443
          entropy_coeff: 0.009999999999999998
          kl: 0.005020708246122337
          policy_loss: -0.01747151447667016
          total_loss: -0.004005597366227044
          vf_explained_var: 0.09463505446910858
          vf_loss: 0.017181066289130183
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000
    num_steps_sampled: 212000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,212,5581.1,212000,-2.399,-2.1,-3.41,239.9




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 213000
  custom_metrics: {}
  date: 2021-11-05_14-05-43
  done: false
  episode_len_mean: 239.2
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.3919999999999924
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 811
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9136693058262614e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.35905841257837084
          entropy_coeff: 0.009999999999999998
          kl: 0.003470427263530951
          policy_loss: 0.034016352478000854
          total_loss: 0.043551488551828596
          vf_explained_var: 0.10744717717170715
          vf_loss: 0.013125720113101933
    num_agent_steps_sampled: 213000
    num_agent_steps_trained: 213000
    num_steps_sampled: 213000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,213,5626.57,213000,-2.392,-1.97,-3.41,239.2


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 214000
  custom_metrics: {}
  date: 2021-11-05_14-06-10
  done: false
  episode_len_mean: 238.21
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.382099999999993
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 5
  episodes_total: 816
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4568346529131307e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.39697239730093214
          entropy_coeff: 0.009999999999999998
          kl: 0.006370736127804279
          policy_loss: -0.016447422405083973
          total_loss: -0.0022876347104708356
          vf_explained_var: 0.10732738673686981
          vf_loss: 0.01812951141554448
    num_agent_steps_sampled: 214000
    num_agent_steps_trained: 214000
    num_steps_sampled: 214000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,214,5653.58,214000,-2.3821,-1.97,-3.41,238.21


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 215000
  custom_metrics: {}
  date: 2021-11-05_14-06-37
  done: false
  episode_len_mean: 237.71
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.3770999999999933
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 820
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4568346529131307e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.36683513621489205
          entropy_coeff: 0.009999999999999998
          kl: 0.007679903873615264
          policy_loss: 0.02493005535668797
          total_loss: 0.03423235375020239
          vf_explained_var: 0.12701553106307983
          vf_loss: 0.012970651065309842
    num_agent_steps_sampled: 215000
    num_agent_steps_trained: 215000
    num_steps_sampled: 215000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,215,5680.66,215000,-2.3771,-1.97,-3.41,237.71


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 216000
  custom_metrics: {}
  date: 2021-11-05_14-07-07
  done: false
  episode_len_mean: 235.84
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.3583999999999934
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 5
  episodes_total: 825
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4568346529131307e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.40002344416247476
          entropy_coeff: 0.009999999999999998
          kl: 0.00443114996958174
          policy_loss: -0.020501178171899583
          total_loss: -0.007987891137599946
          vf_explained_var: 0.13239407539367676
          vf_loss: 0.01651352204175459
    num_agent_steps_sampled: 216000
    num_agent_steps_trained: 216000
    num_steps_sampled: 216000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,216,5710.8,216000,-2.3584,-1.97,-3.41,235.84


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 217000
  custom_metrics: {}
  date: 2021-11-05_14-07-34
  done: false
  episode_len_mean: 234.3
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.3429999999999938
  episode_reward_min: -2.769999999999985
  episodes_this_iter: 4
  episodes_total: 829
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.284173264565654e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.5644843730661604
          entropy_coeff: 0.009999999999999998
          kl: 0.01909785059972613
          policy_loss: 0.031020856317546634
          total_loss: 0.038394809348715676
          vf_explained_var: 0.10537687689065933
          vf_loss: 0.013018794730305672
    num_agent_steps_sampled: 217000
    num_agent_steps_trained: 217000
    num_steps_sampled: 217000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,217,5737,217000,-2.343,-1.97,-2.77,234.3


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 218000
  custom_metrics: {}
  date: 2021-11-05_14-08-01
  done: false
  episode_len_mean: 233.67
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.336699999999994
  episode_reward_min: -2.769999999999985
  episodes_this_iter: 4
  episodes_total: 833
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.284173264565654e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.3320290429724587
          entropy_coeff: 0.009999999999999998
          kl: 0.006045164658922254
          policy_loss: -0.05506257994307412
          total_loss: -0.04460537930329641
          vf_explained_var: 0.08273564279079437
          vf_loss: 0.013777489680796862
    num_agent_steps_sampled: 218000
    num_agent_steps_trained: 218000
    num_steps_sampled: 218000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,218,5764.21,218000,-2.3367,-1.97,-2.77,233.67


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 219000
  custom_metrics: {}
  date: 2021-11-05_14-08-28
  done: false
  episode_len_mean: 232.43
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.3242999999999947
  episode_reward_min: -2.769999999999985
  episodes_this_iter: 5
  episodes_total: 838
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.284173264565654e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.33091663635439345
          entropy_coeff: 0.009999999999999998
          kl: 0.008360470269551988
          policy_loss: -0.00975364281071557
          total_loss: 0.005390453421407275
          vf_explained_var: 0.0766679123044014
          vf_loss: 0.018453262456589275
    num_agent_steps_sampled: 219000
    num_agent_steps_trained: 219000
    num_steps_sampled: 219000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,219,5790.9,219000,-2.3243,-1.97,-2.77,232.43




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-11-05_14-09-12
  done: false
  episode_len_mean: 231.52
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.315199999999995
  episode_reward_min: -2.769999999999985
  episodes_this_iter: 4
  episodes_total: 842
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.284173264565654e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.4409753272930781
          entropy_coeff: 0.009999999999999998
          kl: 0.012822224078894095
          policy_loss: -0.016573986576663122
          total_loss: -0.007035731275876363
          vf_explained_var: 0.10597323626279831
          vf_loss: 0.013948010187596083
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,220,5835.09,220000,-2.3152,-1.97,-2.77,231.52


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 221000
  custom_metrics: {}
  date: 2021-11-05_14-09-38
  done: false
  episode_len_mean: 231.18
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.3117999999999945
  episode_reward_min: -2.769999999999985
  episodes_this_iter: 5
  episodes_total: 847
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.284173264565654e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.6831649528609381
          entropy_coeff: 0.009999999999999998
          kl: 0.011960204827900453
          policy_loss: -0.017259482791026435
          total_loss: -0.007276753584543864
          vf_explained_var: 0.21821865439414978
          vf_loss: 0.016814379528578786
    num_agent_steps_sampled: 221000
    num_agent_steps_trained: 221000
    num_steps_sampled: 221000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,221,5861.14,221000,-2.3118,-1.97,-2.77,231.18


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 222000
  custom_metrics: {}
  date: 2021-11-05_14-10-03
  done: false
  episode_len_mean: 231.96
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.3195999999999946
  episode_reward_min: -2.769999999999985
  episodes_this_iter: 4
  episodes_total: 851
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.284173264565654e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.85643403728803
          entropy_coeff: 0.009999999999999998
          kl: 0.0095088911789561
          policy_loss: 0.025359955430030823
          total_loss: 0.028035427216026518
          vf_explained_var: 0.3693620562553406
          vf_loss: 0.011239815120481783
    num_agent_steps_sampled: 222000
    num_agent_steps_trained: 222000
    num_steps_sampled: 222000
    num_steps_trained: 222

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,222,5886.62,222000,-2.3196,-1.97,-2.77,231.96


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 223000
  custom_metrics: {}
  date: 2021-11-05_14-10-30
  done: false
  episode_len_mean: 231.6
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.3159999999999945
  episode_reward_min: -2.769999999999985
  episodes_this_iter: 4
  episodes_total: 855
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.284173264565654e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.4451100362671746
          entropy_coeff: 0.009999999999999998
          kl: 0.014970024048428362
          policy_loss: 0.04845394293467204
          total_loss: 0.055889350920915605
          vf_explained_var: 0.13767442107200623
          vf_loss: 0.011886506486270163
    num_agent_steps_sampled: 223000
    num_agent_steps_trained: 223000
    num_steps_sampled: 223000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,223,5913.64,223000,-2.316,-1.97,-2.77,231.6


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 224000
  custom_metrics: {}
  date: 2021-11-05_14-10-57
  done: false
  episode_len_mean: 231.13
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.3112999999999944
  episode_reward_min: -2.769999999999985
  episodes_this_iter: 4
  episodes_total: 859
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.284173264565654e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.35448586212264166
          entropy_coeff: 0.009999999999999998
          kl: 0.011107118127678423
          policy_loss: -0.09560461830761698
          total_loss: -0.08405065304703183
          vf_explained_var: 0.141141876578331
          vf_loss: 0.015098822644601266
    num_agent_steps_sampled: 224000
    num_agent_steps_trained: 224000
    num_steps_sampled: 224000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,224,5940.39,224000,-2.3113,-1.97,-2.77,231.13


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 225000
  custom_metrics: {}
  date: 2021-11-05_14-11-24
  done: false
  episode_len_mean: 230.53
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.305299999999995
  episode_reward_min: -2.769999999999985
  episodes_this_iter: 5
  episodes_total: 864
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.284173264565654e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.3527689195341534
          entropy_coeff: 0.009999999999999998
          kl: 0.006338075289708812
          policy_loss: 0.008358427054352231
          total_loss: 0.0195498485945993
          vf_explained_var: 0.11744619905948639
          vf_loss: 0.014719112165686157
    num_agent_steps_sampled: 225000
    num_agent_steps_trained: 225000
    num_steps_sampled: 225000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,225,5967.15,225000,-2.3053,-1.97,-2.77,230.53


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 226000
  custom_metrics: {}
  date: 2021-11-05_14-11-51
  done: false
  episode_len_mean: 229.96
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.2995999999999945
  episode_reward_min: -2.769999999999985
  episodes_this_iter: 4
  episodes_total: 868
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.284173264565654e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.24759427226252026
          entropy_coeff: 0.009999999999999998
          kl: 0.0035121269887706044
          policy_loss: -0.012829961793290243
          total_loss: -0.002605633685986201
          vf_explained_var: 0.0717696025967598
          vf_loss: 0.012700270116329192
    num_agent_steps_sampled: 226000
    num_agent_steps_trained: 226000
    num_steps_sampled: 226000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,226,5994.38,226000,-2.2996,-1.97,-2.77,229.96




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 227000
  custom_metrics: {}
  date: 2021-11-05_14-12-37
  done: false
  episode_len_mean: 228.52
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2851999999999957
  episode_reward_min: -2.769999999999985
  episodes_this_iter: 5
  episodes_total: 873
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.642086632282827e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.3137573377953635
          entropy_coeff: 0.009999999999999998
          kl: 0.003735186330926712
          policy_loss: 0.0035044233832094403
          total_loss: 0.017183481570747165
          vf_explained_var: 0.07095558196306229
          vf_loss: 0.016816629469394682
    num_agent_steps_sampled: 227000
    num_agent_steps_trained: 227000
    num_steps_sampled: 227000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,227,6039.64,227000,-2.2852,-1.93,-2.77,228.52


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 228000
  custom_metrics: {}
  date: 2021-11-05_14-13-04
  done: false
  episode_len_mean: 227.53
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2752999999999957
  episode_reward_min: -2.769999999999985
  episodes_this_iter: 4
  episodes_total: 877
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8210433161414134e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.29808981468280155
          entropy_coeff: 0.009999999999999998
          kl: 0.004425154272148054
          policy_loss: -0.02750978800985548
          total_loss: -0.018193453715907203
          vf_explained_var: 0.1308273822069168
          vf_loss: 0.01229723034840491
    num_agent_steps_sampled: 228000
    num_agent_steps_trained: 228000
    num_steps_sampled: 228000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,228,6067.21,228000,-2.2753,-1.93,-2.77,227.53


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 229000
  custom_metrics: {}
  date: 2021-11-05_14-13-32
  done: false
  episode_len_mean: 226.26
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.262599999999996
  episode_reward_min: -2.5999999999999885
  episodes_this_iter: 5
  episodes_total: 882
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.105216580707067e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.2952513467934397
          entropy_coeff: 0.009999999999999998
          kl: 0.001925551981960341
          policy_loss: 0.005673718535237842
          total_loss: 0.017981657220257654
          vf_explained_var: 0.10102161765098572
          vf_loss: 0.015260452684015036
    num_agent_steps_sampled: 229000
    num_agent_steps_trained: 229000
    num_steps_sampled: 229000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,229,6094.5,229000,-2.2626,-1.93,-2.6,226.26


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 230000
  custom_metrics: {}
  date: 2021-11-05_14-13-59
  done: false
  episode_len_mean: 225.33
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.253299999999996
  episode_reward_min: -2.5999999999999885
  episodes_this_iter: 5
  episodes_total: 887
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.5526082903535335e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.20275339401430553
          entropy_coeff: 0.009999999999999998
          kl: 0.0019803091855317
          policy_loss: -0.00484699590338601
          total_loss: 0.009618949641784032
          vf_explained_var: 0.041896719485521317
          vf_loss: 0.016493481770157813
    num_agent_steps_sampled: 230000
    num_agent_steps_trained: 230000
    num_steps_sampled: 230000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,230,6122.12,230000,-2.2533,-1.93,-2.6,225.33


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 231000
  custom_metrics: {}
  date: 2021-11-05_14-14-27
  done: false
  episode_len_mean: 224.74
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2473999999999963
  episode_reward_min: -2.5999999999999885
  episodes_this_iter: 4
  episodes_total: 891
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2763041451767667e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.22435080111026764
          entropy_coeff: 0.009999999999999998
          kl: 0.002737446544519015
          policy_loss: 0.03918277745445569
          total_loss: 0.04814355481002066
          vf_explained_var: 0.05725305527448654
          vf_loss: 0.01120428375692831
    num_agent_steps_sampled: 231000
    num_agent_steps_trained: 231000
    num_steps_sampled: 231000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,231,6149.73,231000,-2.2474,-1.93,-2.6,224.74


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 232000
  custom_metrics: {}
  date: 2021-11-05_14-14-54
  done: false
  episode_len_mean: 224.43
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.244299999999996
  episode_reward_min: -2.5999999999999885
  episodes_this_iter: 5
  episodes_total: 896
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1381520725883834e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.24457396186060376
          entropy_coeff: 0.009999999999999998
          kl: 0.004485231658511819
          policy_loss: -0.013312878128555085
          total_loss: 7.930215862062242e-05
          vf_explained_var: 0.05818315967917442
          vf_loss: 0.015837920477820767
    num_agent_steps_sampled: 232000
    num_agent_steps_trained: 232000
    num_steps_sampled: 232000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,232,6177.21,232000,-2.2443,-1.93,-2.6,224.43




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 233000
  custom_metrics: {}
  date: 2021-11-05_14-15-39
  done: false
  episode_len_mean: 224.01
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.240099999999996
  episode_reward_min: -2.5999999999999885
  episodes_this_iter: 4
  episodes_total: 900
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.690760362941917e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.23433098793029786
          entropy_coeff: 0.009999999999999998
          kl: 0.005039759721299377
          policy_loss: -0.015069056550661724
          total_loss: -0.005039218150907092
          vf_explained_var: 0.04756789654493332
          vf_loss: 0.012373153037495083
    num_agent_steps_sampled: 233000
    num_agent_steps_trained: 233000
    num_steps_sampled: 233000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,233,6221.99,233000,-2.2401,-1.93,-2.6,224.01


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 234000
  custom_metrics: {}
  date: 2021-11-05_14-16-06
  done: false
  episode_len_mean: 223.94
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.239399999999996
  episode_reward_min: -2.5999999999999885
  episodes_this_iter: 5
  episodes_total: 905
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.690760362941917e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.368015409178204
          entropy_coeff: 0.009999999999999998
          kl: 0.01476876925806556
          policy_loss: -0.017486784358819327
          total_loss: -0.004470607141653696
          vf_explained_var: 0.04888588562607765
          vf_loss: 0.01669633456816276
    num_agent_steps_sampled: 234000
    num_agent_steps_trained: 234000
    num_steps_sampled: 234000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,234,6248.92,234000,-2.2394,-1.93,-2.6,223.94


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 235000
  custom_metrics: {}
  date: 2021-11-05_14-16-33
  done: false
  episode_len_mean: 224.1
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.240999999999996
  episode_reward_min: -2.5999999999999885
  episodes_this_iter: 4
  episodes_total: 909
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.690760362941917e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.41776431169774797
          entropy_coeff: 0.009999999999999998
          kl: 0.002810033189196185
          policy_loss: 0.027842510408825346
          total_loss: 0.03501037524806128
          vf_explained_var: 0.04694264382123947
          vf_loss: 0.01134550638186435
    num_agent_steps_sampled: 235000
    num_agent_steps_trained: 235000
    num_steps_sampled: 235000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,235,6275.74,235000,-2.241,-1.93,-2.6,224.1


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 236000
  custom_metrics: {}
  date: 2021-11-05_14-16-59
  done: false
  episode_len_mean: 224.59
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2458999999999962
  episode_reward_min: -2.5999999999999885
  episodes_this_iter: 5
  episodes_total: 914
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8453801814709584e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.534551923804813
          entropy_coeff: 0.009999999999999998
          kl: 0.03790940318442119
          policy_loss: -0.0227003442744414
          total_loss: -0.011372641391224332
          vf_explained_var: 0.12910564243793488
          vf_loss: 0.016673222246269385
    num_agent_steps_sampled: 236000
    num_agent_steps_trained: 236000
    num_steps_sampled: 236000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,236,6302.21,236000,-2.2459,-1.93,-2.6,224.59


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 237000
  custom_metrics: {}
  date: 2021-11-05_14-17-24
  done: false
  episode_len_mean: 225.42
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.254199999999996
  episode_reward_min: -2.5999999999999885
  episodes_this_iter: 4
  episodes_total: 918
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.268070272206435e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.5677706797917684
          entropy_coeff: 0.009999999999999998
          kl: 0.01680327405728915
          policy_loss: -0.0007845528009865019
          total_loss: 0.006270815307895343
          vf_explained_var: 0.22986575961112976
          vf_loss: 0.012733074898521105
    num_agent_steps_sampled: 237000
    num_agent_steps_trained: 237000
    num_steps_sampled: 237000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,237,6327.2,237000,-2.2542,-1.93,-2.6,225.42


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 238000
  custom_metrics: {}
  date: 2021-11-05_14-17-50
  done: false
  episode_len_mean: 226.27
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2626999999999957
  episode_reward_min: -2.5999999999999885
  episodes_this_iter: 4
  episodes_total: 922
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.268070272206435e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.6496657285425398
          entropy_coeff: 0.009999999999999998
          kl: 0.010625113494534667
          policy_loss: 0.01191465225484636
          total_loss: 0.018171744959221945
          vf_explained_var: 0.2649502754211426
          vf_loss: 0.012753748386684392
    num_agent_steps_sampled: 238000
    num_agent_steps_trained: 238000
    num_steps_sampled: 238000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,238,6352.67,238000,-2.2627,-1.93,-2.6,226.27


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 239000
  custom_metrics: {}
  date: 2021-11-05_14-18-15
  done: false
  episode_len_mean: 227.08
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2707999999999955
  episode_reward_min: -2.5999999999999885
  episodes_this_iter: 4
  episodes_total: 926
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.268070272206435e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.6879722429646387
          entropy_coeff: 0.009999999999999998
          kl: 0.0548421628558833
          policy_loss: 0.031201469691263306
          total_loss: 0.036162418872118
          vf_explained_var: 0.30141910910606384
          vf_loss: 0.011840674477732844
    num_agent_steps_sampled: 239000
    num_agent_steps_trained: 239000
    num_steps_sampled: 239000
    num_steps_trained: 23

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,239,6377.83,239000,-2.2708,-1.93,-2.6,227.08




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-11-05_14-18-59
  done: false
  episode_len_mean: 226.87
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2686999999999955
  episode_reward_min: -2.5999999999999885
  episodes_this_iter: 4
  episodes_total: 930
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.402105408309657e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.45545803474055396
          entropy_coeff: 0.009999999999999998
          kl: 0.019365268810022703
          policy_loss: -0.004136372026469972
          total_loss: 0.0037606987274355357
          vf_explained_var: 0.20687323808670044
          vf_loss: 0.012451649643480778
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,240,6421.35,240000,-2.2687,-1.93,-2.6,226.87


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 241000
  custom_metrics: {}
  date: 2021-11-05_14-19-25
  done: false
  episode_len_mean: 227.37
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2736999999999954
  episode_reward_min: -2.5999999999999885
  episodes_this_iter: 5
  episodes_total: 935
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.402105408309657e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.5489034331507153
          entropy_coeff: 0.009999999999999998
          kl: 0.005539359750639663
          policy_loss: -0.02703693856795629
          total_loss: -0.01643755667739444
          vf_explained_var: 0.15949779748916626
          vf_loss: 0.0160884166860746
    num_agent_steps_sampled: 241000
    num_agent_steps_trained: 241000
    num_steps_sampled: 241000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,241,6447.72,241000,-2.2737,-1.93,-2.6,227.37


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 242000
  custom_metrics: {}
  date: 2021-11-05_14-19-51
  done: false
  episode_len_mean: 227.78
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2777999999999956
  episode_reward_min: -2.5999999999999885
  episodes_this_iter: 4
  episodes_total: 939
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.402105408309657e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.9527885410520766
          entropy_coeff: 0.009999999999999998
          kl: 0.0741472931423126
          policy_loss: 0.01923394426703453
          total_loss: 0.021968575649791294
          vf_explained_var: 0.19538451731204987
          vf_loss: 0.012262516861988438
    num_agent_steps_sampled: 242000
    num_agent_steps_trained: 242000
    num_steps_sampled: 242000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,242,6473.56,242000,-2.2778,-1.93,-2.6,227.78


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 243000
  custom_metrics: {}
  date: 2021-11-05_14-20-14
  done: false
  episode_len_mean: 229.62
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.296199999999995
  episode_reward_min: -2.7999999999999843
  episodes_this_iter: 3
  episodes_total: 942
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.603158112464485e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.2194272041320802
          entropy_coeff: 0.009999999999999998
          kl: 0.01665204815053711
          policy_loss: -0.07885080517994034
          total_loss: -0.07976095875104268
          vf_explained_var: 0.05682602524757385
          vf_loss: 0.011284116588325965
    num_agent_steps_sampled: 243000
    num_agent_steps_trained: 243000
    num_steps_sampled: 243000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,243,6496.29,243000,-2.2962,-1.93,-2.8,229.62


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 244000
  custom_metrics: {}
  date: 2021-11-05_14-20-38
  done: false
  episode_len_mean: 230.83
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3082999999999942
  episode_reward_min: -2.7999999999999843
  episodes_this_iter: 4
  episodes_total: 946
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.603158112464485e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.0715977324379815
          entropy_coeff: 0.009999999999999998
          kl: 0.012996966337481158
          policy_loss: -0.015173072119553883
          total_loss: -0.012287190225389268
          vf_explained_var: 0.07309377193450928
          vf_loss: 0.013601860393666558
    num_agent_steps_sampled: 244000
    num_agent_steps_trained: 244000
    num_steps_sampled: 244000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,244,6520.22,244000,-2.3083,-1.93,-2.8,230.83


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 245000
  custom_metrics: {}
  date: 2021-11-05_14-21-02
  done: false
  episode_len_mean: 231.27
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.312699999999994
  episode_reward_min: -2.7999999999999843
  episodes_this_iter: 4
  episodes_total: 950
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.603158112464485e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.0529122511545816
          entropy_coeff: 0.009999999999999998
          kl: 0.011054420697158522
          policy_loss: -0.004422694444656372
          total_loss: -0.001458084417714013
          vf_explained_var: 0.10859591513872147
          vf_loss: 0.013493732580294212
    num_agent_steps_sampled: 245000
    num_agent_steps_trained: 245000
    num_steps_sampled: 245000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,245,6544.14,245000,-2.3127,-1.93,-2.8,231.27


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 246000
  custom_metrics: {}
  date: 2021-11-05_14-21-25
  done: false
  episode_len_mean: 232.54
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3253999999999944
  episode_reward_min: -2.7999999999999843
  episodes_this_iter: 4
  episodes_total: 954
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.603158112464485e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.0170684198538462
          entropy_coeff: 0.009999999999999998
          kl: 0.0163158099261574
          policy_loss: 0.023233460552162594
          total_loss: 0.025712252656618754
          vf_explained_var: 0.10114424675703049
          vf_loss: 0.012649476755824354
    num_agent_steps_sampled: 246000
    num_agent_steps_trained: 246000
    num_steps_sampled: 246000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,246,6567.79,246000,-2.3254,-1.93,-2.8,232.54


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 247000
  custom_metrics: {}
  date: 2021-11-05_14-21-49
  done: false
  episode_len_mean: 233.69
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3368999999999938
  episode_reward_min: -2.7999999999999843
  episodes_this_iter: 4
  episodes_total: 958
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.603158112464485e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.941627460055881
          entropy_coeff: 0.009999999999999998
          kl: 0.006646349413619785
          policy_loss: 0.02286133902768294
          total_loss: 0.026383452407187886
          vf_explained_var: 0.1390385776758194
          vf_loss: 0.012938389223482874
    num_agent_steps_sampled: 247000
    num_agent_steps_trained: 247000
    num_steps_sampled: 247000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,247,6591.83,247000,-2.3369,-1.93,-2.8,233.69




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 248000
  custom_metrics: {}
  date: 2021-11-05_14-22-33
  done: false
  episode_len_mean: 234.11
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3410999999999937
  episode_reward_min: -2.7999999999999843
  episodes_this_iter: 4
  episodes_total: 962
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.603158112464485e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.9166029810905456
          entropy_coeff: 0.009999999999999998
          kl: 0.00647004823518971
          policy_loss: 0.019642444368865756
          total_loss: 0.02384488855799039
          vf_explained_var: 0.09471151232719421
          vf_loss: 0.01336847303642167
    num_agent_steps_sampled: 248000
    num_agent_steps_trained: 248000
    num_steps_sampled: 248000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,248,6635.84,248000,-2.3411,-1.93,-2.8,234.11


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 249000
  custom_metrics: {}
  date: 2021-11-05_14-22-58
  done: false
  episode_len_mean: 235.15
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.351499999999994
  episode_reward_min: -2.7999999999999843
  episodes_this_iter: 4
  episodes_total: 966
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.603158112464485e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.009343355231815
          entropy_coeff: 0.009999999999999998
          kl: 0.008618012797927972
          policy_loss: 0.01652572618590461
          total_loss: 0.02066666450765398
          vf_explained_var: 0.08906754106283188
          vf_loss: 0.014234371555762158
    num_agent_steps_sampled: 249000
    num_agent_steps_trained: 249000
    num_steps_sampled: 249000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,249,6660.18,249000,-2.3515,-1.93,-2.8,235.15


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 250000
  custom_metrics: {}
  date: 2021-11-05_14-23-22
  done: false
  episode_len_mean: 236.8
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.3679999999999937
  episode_reward_min: -2.7999999999999843
  episodes_this_iter: 4
  episodes_total: 970
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.603158112464485e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.1554129944907294
          entropy_coeff: 0.009999999999999998
          kl: 0.020442669825472177
          policy_loss: 0.009067154220408864
          total_loss: 0.010500379072295295
          vf_explained_var: 0.3600114583969116
          vf_loss: 0.012987355318748289
    num_agent_steps_sampled: 250000
    num_agent_steps_trained: 250000
    num_steps_sampled: 250000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,250,6684.03,250000,-2.368,-1.95,-2.8,236.8


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 251000
  custom_metrics: {}
  date: 2021-11-05_14-23-44
  done: false
  episode_len_mean: 238.71
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.387099999999993
  episode_reward_min: -2.959999999999981
  episodes_this_iter: 3
  episodes_total: 973
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4404737168696718e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.5353905624813504
          entropy_coeff: 0.009999999999999998
          kl: 0.020668690236364442
          policy_loss: -0.10634703288475672
          total_loss: -0.10088260347644488
          vf_explained_var: 0.07381491363048553
          vf_loss: 0.02081833463162184
    num_agent_steps_sampled: 251000
    num_agent_steps_trained: 251000
    num_steps_sampled: 251000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,251,6706.34,251000,-2.3871,-1.95,-2.96,238.71


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 252000
  custom_metrics: {}
  date: 2021-11-05_14-24-06
  done: false
  episode_len_mean: 241.25
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.4124999999999925
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 977
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1607105753045092e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.5219927522871228
          entropy_coeff: 0.009999999999999998
          kl: 0.022900783887852168
          policy_loss: 0.0018503629912932714
          total_loss: 5.487472646766239e-05
          vf_explained_var: 0.4262714087963104
          vf_loss: 0.013424442315267193
    num_agent_steps_sampled: 252000
    num_agent_steps_trained: 252000
    num_steps_sampled: 252000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,252,6728.25,252000,-2.4125,-1.95,-2.99,241.25


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 253000
  custom_metrics: {}
  date: 2021-11-05_14-24-29
  done: false
  episode_len_mean: 242.95
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.4294999999999924
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 3
  episodes_total: 980
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.241065862956763e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.2856663041644627
          entropy_coeff: 0.009999999999999998
          kl: 0.008723342825334789
          policy_loss: -0.06307784327202373
          total_loss: -0.06664331878225009
          vf_explained_var: 0.6281176209449768
          vf_loss: 0.009291187953203916
    num_agent_steps_sampled: 253000
    num_agent_steps_trained: 253000
    num_steps_sampled: 253000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,253,6751.79,253000,-2.4295,-1.95,-2.99,242.95


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 254000
  custom_metrics: {}
  date: 2021-11-05_14-24-53
  done: false
  episode_len_mean: 245.06
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.4505999999999917
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 984
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.241065862956763e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.3398439698749118
          entropy_coeff: 0.009999999999999998
          kl: 0.011546599450029128
          policy_loss: 0.013274937040275998
          total_loss: 0.012708395057254367
          vf_explained_var: 0.5214856266975403
          vf_loss: 0.012831895136170918
    num_agent_steps_sampled: 254000
    num_agent_steps_trained: 254000
    num_steps_sampled: 254000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,254,6775.42,254000,-2.4506,-1.95,-2.99,245.06


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 255000
  custom_metrics: {}
  date: 2021-11-05_14-25-17
  done: false
  episode_len_mean: 246.66
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.4665999999999912
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 988
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.241065862956763e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.400800511572096
          entropy_coeff: 0.009999999999999998
          kl: 0.10395878845000882
          policy_loss: 0.056123407474822466
          total_loss: 0.05173167735338211
          vf_explained_var: 0.7207633852958679
          vf_loss: 0.009616275405925181
    num_agent_steps_sampled: 255000
    num_agent_steps_trained: 255000
    num_steps_sampled: 255000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,255,6799.75,255000,-2.4666,-1.95,-2.99,246.66




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 256000
  custom_metrics: {}
  date: 2021-11-05_14-25-59
  done: false
  episode_len_mean: 248.64
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.486399999999991
  episode_reward_min: -3.279999999999974
  episodes_this_iter: 3
  episodes_total: 991
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.861598794435144e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.670793781015608
          entropy_coeff: 0.009999999999999998
          kl: 0.023229002342068917
          policy_loss: -0.13338957362704806
          total_loss: -0.13931861486699845
          vf_explained_var: 0.6215405464172363
          vf_loss: 0.01077890067309555
    num_agent_steps_sampled: 256000
    num_agent_steps_trained: 256000
    num_steps_sampled: 256000
    num_steps_trained: 25

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,256,6841.72,256000,-2.4864,-1.95,-3.28,248.64


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 257000
  custom_metrics: {}
  date: 2021-11-05_14-26-22
  done: false
  episode_len_mean: 251.26
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.51259999999999
  episode_reward_min: -3.279999999999974
  episodes_this_iter: 4
  episodes_total: 995
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.292398191652719e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.5319692055384317
          entropy_coeff: 0.009999999999999998
          kl: 0.02667018773273679
          policy_loss: 0.05513020037776894
          total_loss: 0.04842418813043171
          vf_explained_var: 0.46564796566963196
          vf_loss: 0.0086136806394077
    num_agent_steps_sampled: 257000
    num_agent_steps_trained: 257000
    num_steps_sampled: 257000
    num_steps_trained: 25700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,257,6863.92,257000,-2.5126,-1.95,-3.28,251.26


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 258000
  custom_metrics: {}
  date: 2021-11-05_14-26-42
  done: false
  episode_len_mean: 254.17
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.54169999999999
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 3
  episodes_total: 998
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0938597287479072e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.5763505286640591
          entropy_coeff: 0.009999999999999998
          kl: 0.008299913311687451
          policy_loss: 0.016482435001267327
          total_loss: 0.009017800622516208
          vf_explained_var: 0.3730461597442627
          vf_loss: 0.008298867852297715
    num_agent_steps_sampled: 258000
    num_agent_steps_trained: 258000
    num_steps_sampled: 258000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,258,6884.38,258000,-2.5417,-1.95,-3.31,254.17


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 259000
  custom_metrics: {}
  date: 2021-11-05_14-27-03
  done: false
  episode_len_mean: 257.04
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.5703999999999896
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 3
  episodes_total: 1001
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0938597287479072e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.4625496851073372
          entropy_coeff: 0.009999999999999998
          kl: 0.010041049170918153
          policy_loss: -0.11640507678190867
          total_loss: -0.12006710436609057
          vf_explained_var: 0.2850131690502167
          vf_loss: 0.010963468781361977
    num_agent_steps_sampled: 259000
    num_agent_steps_trained: 259000
    num_steps_sampled: 259000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,259,6905.37,259000,-2.5704,-1.96,-3.31,257.04


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 260000
  custom_metrics: {}
  date: 2021-11-05_14-27-24
  done: false
  episode_len_mean: 260.54
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.6053999999999884
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 1005
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0938597287479072e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.4595201055208842
          entropy_coeff: 0.009999999999999998
          kl: 0.009899902496520044
          policy_loss: -0.004972498118877411
          total_loss: -0.009356952003306813
          vf_explained_var: 0.40336841344833374
          vf_loss: 0.01021074407423536
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_steps_sampled: 260000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,260,6926.33,260000,-2.6054,-1.96,-3.31,260.54


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 261000
  custom_metrics: {}
  date: 2021-11-05_14-27-46
  done: false
  episode_len_mean: 262.48
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.624799999999988
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 3
  episodes_total: 1008
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0938597287479072e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.4117714921633402
          entropy_coeff: 0.009999999999999998
          kl: 0.007225554345315051
          policy_loss: 0.05124260915650262
          total_loss: 0.0442996472120285
          vf_explained_var: 0.6048305630683899
          vf_loss: 0.00717475014195467
    num_agent_steps_sampled: 261000
    num_agent_steps_trained: 261000
    num_steps_sampled: 261000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,261,6947.96,261000,-2.6248,-1.96,-3.31,262.48


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 262000
  custom_metrics: {}
  date: 2021-11-05_14-28-09
  done: false
  episode_len_mean: 264.7
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.6469999999999874
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 1012
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0938597287479072e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.4023287031385634
          entropy_coeff: 0.009999999999999998
          kl: 0.007192305072823816
          policy_loss: -0.004593631873528163
          total_loss: -0.009145700434843699
          vf_explained_var: 0.42559221386909485
          vf_loss: 0.009471217460102505
    num_agent_steps_sampled: 262000
    num_agent_steps_trained: 262000
    num_steps_sampled: 262000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,262,6970.7,262000,-2.647,-1.96,-3.31,264.7


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 263000
  custom_metrics: {}
  date: 2021-11-05_14-28-31
  done: false
  episode_len_mean: 266.31
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.6630999999999876
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 3
  episodes_total: 1015
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0938597287479072e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.4894136839442782
          entropy_coeff: 0.009999999999999998
          kl: 0.01089034929753241
          policy_loss: 0.038611591193411086
          total_loss: 0.030598556498686473
          vf_explained_var: 0.4277322590351105
          vf_loss: 0.006881101563986805
    num_agent_steps_sampled: 263000
    num_agent_steps_trained: 263000
    num_steps_sampled: 263000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,263,6992.6,263000,-2.6631,-1.96,-3.31,266.31


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 264000
  custom_metrics: {}
  date: 2021-11-05_14-28-53
  done: false
  episode_len_mean: 267.89
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.6788999999999867
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 1019
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0938597287479072e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.3635655681292216
          entropy_coeff: 0.009999999999999998
          kl: 0.00882198745517289
          policy_loss: 0.015025118903981314
          total_loss: 0.011655088514089584
          vf_explained_var: 0.2757924497127533
          vf_loss: 0.010265626753163006
    num_agent_steps_sampled: 264000
    num_agent_steps_trained: 264000
    num_steps_sampled: 264000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,264,7014.95,264000,-2.6789,-1.96,-3.31,267.89




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 265000
  custom_metrics: {}
  date: 2021-11-05_14-29-32
  done: false
  episode_len_mean: 269.67
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.6966999999999866
  episode_reward_min: -3.3599999999999723
  episodes_this_iter: 3
  episodes_total: 1022
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0938597287479072e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.5713898340861003
          entropy_coeff: 0.009999999999999998
          kl: 0.015742239884285504
          policy_loss: 0.05225974627667003
          total_loss: 0.045404276251792906
          vf_explained_var: 0.39922574162483215
          vf_loss: 0.008858430005622924
    num_agent_steps_sampled: 265000
    num_agent_steps_trained: 265000
    num_steps_sampled: 265000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,265,7053.83,265000,-2.6967,-1.96,-3.36,269.67


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 266000
  custom_metrics: {}
  date: 2021-11-05_14-29-52
  done: false
  episode_len_mean: 272.21
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.722099999999987
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 3
  episodes_total: 1025
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0938597287479072e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.5916541801558601
          entropy_coeff: 0.009999999999999998
          kl: 0.008405342960566269
          policy_loss: 0.07947365393241247
          total_loss: 0.07174196243286132
          vf_explained_var: 0.14957372844219208
          vf_loss: 0.008184849913878781
    num_agent_steps_sampled: 266000
    num_agent_steps_trained: 266000
    num_steps_sampled: 266000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,266,7073.67,266000,-2.7221,-1.96,-3.44,272.21


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 267000
  custom_metrics: {}
  date: 2021-11-05_14-30-11
  done: false
  episode_len_mean: 275.31
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.7530999999999852
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 3
  episodes_total: 1028
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0938597287479072e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.5528587129380969
          entropy_coeff: 0.009999999999999998
          kl: 0.01101178704147772
          policy_loss: 0.08335060560041004
          total_loss: 0.07737526686655151
          vf_explained_var: 0.2913714051246643
          vf_loss: 0.009553246815145636
    num_agent_steps_sampled: 267000
    num_agent_steps_trained: 267000
    num_steps_sampled: 267000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,267,7092.61,267000,-2.7531,-1.96,-3.61,275.31


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 268000
  custom_metrics: {}
  date: 2021-11-05_14-30-31
  done: false
  episode_len_mean: 278.17
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.7816999999999847
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 3
  episodes_total: 1031
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0938597287479072e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.479958090517256
          entropy_coeff: 0.009999999999999998
          kl: 0.009830641062348317
          policy_loss: 0.06421007961034775
          total_loss: 0.06010905487669839
          vf_explained_var: 0.11969112604856491
          vf_loss: 0.010698558741003379
    num_agent_steps_sampled: 268000
    num_agent_steps_trained: 268000
    num_steps_sampled: 268000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,268,7112.77,268000,-2.7817,-2.03,-3.61,278.17


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 269000
  custom_metrics: {}
  date: 2021-11-05_14-30-52
  done: false
  episode_len_mean: 280.25
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.8024999999999842
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 3
  episodes_total: 1034
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0938597287479072e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.4039154132207234
          entropy_coeff: 0.009999999999999998
          kl: 0.008550973702585197
          policy_loss: -0.01478554324971305
          total_loss: -0.017240692923466364
          vf_explained_var: -0.01746469922363758
          vf_loss: 0.011584004498500791
    num_agent_steps_sampled: 269000
    num_agent_steps_trained: 269000
    num_steps_sampled: 269000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,269,7133.98,269000,-2.8025,-2.03,-3.61,280.25


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 270000
  custom_metrics: {}
  date: 2021-11-05_14-31-14
  done: false
  episode_len_mean: 282.79
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.827899999999983
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 1038
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0938597287479072e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.5413938257429334
          entropy_coeff: 0.009999999999999998
          kl: 0.013591252758291204
          policy_loss: -0.012814937863085005
          total_loss: -0.014207170738114252
          vf_explained_var: 0.1916164755821228
          vf_loss: 0.014021702762693166
    num_agent_steps_sampled: 270000
    num_agent_steps_trained: 270000
    num_steps_sampled: 270000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,270,7155.48,270000,-2.8279,-2.03,-3.61,282.79


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 271000
  custom_metrics: {}
  date: 2021-11-05_14-31-36
  done: false
  episode_len_mean: 283.93
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.8392999999999824
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 3
  episodes_total: 1041
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0938597287479072e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.5644777470164828
          entropy_coeff: 0.009999999999999998
          kl: 0.011075230707601828
          policy_loss: 0.05422337899605433
          total_loss: 0.04753169905808237
          vf_explained_var: 0.4154702126979828
          vf_loss: 0.008953097033857679
    num_agent_steps_sampled: 271000
    num_agent_steps_trained: 271000
    num_steps_sampled: 271000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,271,7177.39,271000,-2.8393,-2.03,-3.61,283.93


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 272000
  custom_metrics: {}
  date: 2021-11-05_14-31-56
  done: false
  episode_len_mean: 285.37
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.853699999999983
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 3
  episodes_total: 1044
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0938597287479072e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.3934923026296828
          entropy_coeff: 0.009999999999999998
          kl: 0.008769107623472886
          policy_loss: 0.05226086212529076
          total_loss: 0.04849118871821297
          vf_explained_var: 0.4823231101036072
          vf_loss: 0.010165247678782584
    num_agent_steps_sampled: 272000
    num_agent_steps_trained: 272000
    num_steps_sampled: 272000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,272,7197.91,272000,-2.8537,-2.03,-3.61,285.37


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 273000
  custom_metrics: {}
  date: 2021-11-05_14-32-18
  done: false
  episode_len_mean: 286.97
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.869699999999983
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 3
  episodes_total: 1047
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0938597287479072e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.3949955423672995
          entropy_coeff: 0.009999999999999998
          kl: 0.01460871635425563
          policy_loss: -0.09951370830337207
          total_loss: -0.10265111534131897
          vf_explained_var: 0.5696659684181213
          vf_loss: 0.010812547161347337
    num_agent_steps_sampled: 273000
    num_agent_steps_trained: 273000
    num_steps_sampled: 273000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,273,7219.96,273000,-2.8697,-2.03,-3.61,286.97




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 274000
  custom_metrics: {}
  date: 2021-11-05_14-32-59
  done: false
  episode_len_mean: 288.13
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.881299999999982
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 1051
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0938597287479072e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.501288448439704
          entropy_coeff: 0.009999999999999998
          kl: 0.02797740448035763
          policy_loss: 0.006828289479017258
          total_loss: 0.007075035696228345
          vf_explained_var: 0.5709905624389648
          vf_loss: 0.015259631319592397
    num_agent_steps_sampled: 274000
    num_agent_steps_trained: 274000
    num_steps_sampled: 274000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,274,7260.42,274000,-2.8813,-2.03,-3.61,288.13


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 275000
  custom_metrics: {}
  date: 2021-11-05_14-33-21
  done: false
  episode_len_mean: 288.73
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.887299999999982
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 3
  episodes_total: 1054
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6407895931218612e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.2544003672069974
          entropy_coeff: 0.009999999999999998
          kl: 0.015663925345619292
          policy_loss: -0.026411848349703684
          total_loss: -0.02916672569182184
          vf_explained_var: 0.6908217668533325
          vf_loss: 0.0097891286895093
    num_agent_steps_sampled: 275000
    num_agent_steps_trained: 275000
    num_steps_sampled: 275000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,275,7283.08,275000,-2.8873,-2.03,-3.61,288.73


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 276000
  custom_metrics: {}
  date: 2021-11-05_14-33-44
  done: false
  episode_len_mean: 290.05
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.900499999999982
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 1058
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6407895931218612e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.2789599431885614
          entropy_coeff: 0.009999999999999998
          kl: 0.008562640433778017
          policy_loss: 0.026069730189111497
          total_loss: 0.02106030624773767
          vf_explained_var: 0.7722715735435486
          vf_loss: 0.007780178657008542
    num_agent_steps_sampled: 276000
    num_agent_steps_trained: 276000
    num_steps_sampled: 276000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,276,7305.71,276000,-2.9005,-2.03,-3.61,290.05


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 277000
  custom_metrics: {}
  date: 2021-11-05_14-34-08
  done: false
  episode_len_mean: 291.39
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.9138999999999813
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 1062
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6407895931218612e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.1049449112680223
          entropy_coeff: 0.009999999999999998
          kl: 0.008806366404762716
          policy_loss: -0.06675102756255202
          total_loss: -0.06907930659751098
          vf_explained_var: 0.7459909319877625
          vf_loss: 0.008721169317141175
    num_agent_steps_sampled: 277000
    num_agent_steps_trained: 277000
    num_steps_sampled: 277000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,277,7329.42,277000,-2.9139,-2.3,-3.61,291.39


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 278000
  custom_metrics: {}
  date: 2021-11-05_14-34-32
  done: false
  episode_len_mean: 291.56
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.9155999999999818
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 3
  episodes_total: 1065
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6407895931218612e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.0062133292357127
          entropy_coeff: 0.009999999999999998
          kl: 0.009217986105880193
          policy_loss: -0.07329268008470535
          total_loss: -0.07522987708863285
          vf_explained_var: 0.728269100189209
          vf_loss: 0.008124931182505356
    num_agent_steps_sampled: 278000
    num_agent_steps_trained: 278000
    num_steps_sampled: 278000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,278,7353.76,278000,-2.9156,-2.3,-3.61,291.56


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 279000
  custom_metrics: {}
  date: 2021-11-05_14-34-57
  done: false
  episode_len_mean: 291.43
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.9142999999999817
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 1069
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6407895931218612e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.9430010431342655
          entropy_coeff: 0.009999999999999998
          kl: 0.0078948486294563
          policy_loss: -0.035384398036532934
          total_loss: -0.03505467747648557
          vf_explained_var: 0.5744339227676392
          vf_loss: 0.009759727617104847
    num_agent_steps_sampled: 279000
    num_agent_steps_trained: 279000
    num_steps_sampled: 279000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,279,7378.32,279000,-2.9143,-2.3,-3.61,291.43


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-11-05_14-35-19
  done: false
  episode_len_mean: 291.21
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.9120999999999815
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 1073
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6407895931218612e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.206266819106208
          entropy_coeff: 0.009999999999999998
          kl: 0.013317187768745322
          policy_loss: 0.008575931936502457
          total_loss: 0.013091130140754912
          vf_explained_var: 0.21681728959083557
          vf_loss: 0.016577862937831218
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,280,7400.62,280000,-2.9121,-2.3,-3.61,291.21


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 281000
  custom_metrics: {}
  date: 2021-11-05_14-35-43
  done: false
  episode_len_mean: 290.27
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.902699999999982
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 1077
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6407895931218612e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.9651693291134305
          entropy_coeff: 0.009999999999999998
          kl: 0.006303178576124798
          policy_loss: 0.044564565271139146
          total_loss: 0.04733904716041353
          vf_explained_var: 0.11305532604455948
          vf_loss: 0.012426178343594075
    num_agent_steps_sampled: 281000
    num_agent_steps_trained: 281000
    num_steps_sampled: 281000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,281,7424.21,281000,-2.9027,-2.3,-3.61,290.27




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 282000
  custom_metrics: {}
  date: 2021-11-05_14-36-24
  done: false
  episode_len_mean: 289.42
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.8941999999999823
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 1081
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6407895931218612e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.8495094570848677
          entropy_coeff: 0.009999999999999998
          kl: 0.006640118658323028
          policy_loss: 0.011267736968066957
          total_loss: 0.014463471704059178
          vf_explained_var: 0.29390230774879456
          vf_loss: 0.011690828938865
    num_agent_steps_sampled: 282000
    num_agent_steps_trained: 282000
    num_steps_sampled: 282000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,282,7465.94,282000,-2.8942,-2.22,-3.61,289.42


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 283000
  custom_metrics: {}
  date: 2021-11-05_14-36-49
  done: false
  episode_len_mean: 289.23
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.8922999999999828
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 3
  episodes_total: 1084
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6407895931218612e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.0395371205276913
          entropy_coeff: 0.009999999999999998
          kl: 0.007618799848724311
          policy_loss: -0.10738431844446394
          total_loss: -0.10521057198445002
          vf_explained_var: 0.30951255559921265
          vf_loss: 0.012569114420976904
    num_agent_steps_sampled: 283000
    num_agent_steps_trained: 283000
    num_steps_sampled: 283000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,283,7490.32,283000,-2.8923,-2.22,-3.61,289.23


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 284000
  custom_metrics: {}
  date: 2021-11-05_14-37-13
  done: false
  episode_len_mean: 288.92
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.889199999999983
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 1088
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6407895931218612e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.9674712419509888
          entropy_coeff: 0.009999999999999998
          kl: 0.01000102565948813
          policy_loss: -0.0918692842953735
          total_loss: -0.08896990790963173
          vf_explained_var: 0.48420944809913635
          vf_loss: 0.012574087124731806
    num_agent_steps_sampled: 284000
    num_agent_steps_trained: 284000
    num_steps_sampled: 284000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,284,7514.54,284000,-2.8892,-2.22,-3.61,288.92


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 285000
  custom_metrics: {}
  date: 2021-11-05_14-37-37
  done: false
  episode_len_mean: 287.92
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.8791999999999827
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 1092
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6407895931218612e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.0695657822820874
          entropy_coeff: 0.009999999999999998
          kl: 0.011150923640275007
          policy_loss: 0.008649618923664093
          total_loss: 0.008373602645264731
          vf_explained_var: 0.5865647792816162
          vf_loss: 0.010419640327907272
    num_agent_steps_sampled: 285000
    num_agent_steps_trained: 285000
    num_steps_sampled: 285000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,285,7538.34,285000,-2.8792,-2.22,-3.61,287.92


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 286000
  custom_metrics: {}
  date: 2021-11-05_14-37-59
  done: false
  episode_len_mean: 287.75
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.8774999999999813
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 1096
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6407895931218612e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.3211739314926996
          entropy_coeff: 0.009999999999999998
          kl: 0.025461497935082948
          policy_loss: -0.0041738721231619515
          total_loss: -0.0034397049910492366
          vf_explained_var: 0.19291988015174866
          vf_loss: 0.013945907074958085
    num_agent_steps_sampled: 286000
    num_agent_steps_trained: 286000
    num_steps_sampled: 286000
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,286,7560.04,286000,-2.8775,-2.22,-3.61,287.75


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 287000
  custom_metrics: {}
  date: 2021-11-05_14-38-19
  done: false
  episode_len_mean: 287.5
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.8749999999999822
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 3
  episodes_total: 1099
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.461184389682793e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.3032878743277656
          entropy_coeff: 0.009999999999999998
          kl: 0.013716302848153106
          policy_loss: 0.04784676051802105
          total_loss: 0.044510764380296074
          vf_explained_var: 0.11695785820484161
          vf_loss: 0.009696884711997377
    num_agent_steps_sampled: 287000
    num_agent_steps_trained: 287000
    num_steps_sampled: 287000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,287,7580.7,287000,-2.875,-2.22,-3.61,287.5


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 288000
  custom_metrics: {}
  date: 2021-11-05_14-38-41
  done: false
  episode_len_mean: 287.29
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.872899999999983
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 3
  episodes_total: 1102
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.461184389682793e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.2878049823972915
          entropy_coeff: 0.009999999999999998
          kl: 0.00647085200243531
          policy_loss: -0.046725527445475264
          total_loss: -0.04747358138362567
          vf_explained_var: 0.0640927106142044
          vf_loss: 0.012129996173704664
    num_agent_steps_sampled: 288000
    num_agent_steps_trained: 288000
    num_steps_sampled: 288000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,288,7602.65,288000,-2.8729,-2.22,-3.61,287.29


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 289000
  custom_metrics: {}
  date: 2021-11-05_14-39-03
  done: false
  episode_len_mean: 286.63
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.866299999999983
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 1106
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.461184389682793e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.2288169026374818
          entropy_coeff: 0.009999999999999998
          kl: 0.012806080603570812
          policy_loss: -0.0006911463207668728
          total_loss: 0.0015957099696000418
          vf_explained_var: 0.09321928769350052
          vf_loss: 0.01457502327652441
    num_agent_steps_sampled: 289000
    num_agent_steps_trained: 289000
    num_steps_sampled: 289000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,289,7624.16,289000,-2.8663,-2.22,-3.61,286.63


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 290000
  custom_metrics: {}
  date: 2021-11-05_14-39-26
  done: false
  episode_len_mean: 286.47
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.8646999999999827
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 3
  episodes_total: 1109
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.461184389682793e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.0537885593043432
          entropy_coeff: 0.009999999999999998
          kl: 0.010328534759314442
          policy_loss: -0.041954834510882694
          total_loss: -0.04156576328807407
          vf_explained_var: 0.03373483195900917
          vf_loss: 0.010926956880009837
    num_agent_steps_sampled: 290000
    num_agent_steps_trained: 290000
    num_steps_sampled: 290000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,290,7646.93,290000,-2.8647,-2.22,-3.61,286.47




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 291000
  custom_metrics: {}
  date: 2021-11-05_14-40-08
  done: false
  episode_len_mean: 285.4
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.8539999999999823
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 1113
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.461184389682793e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.0532195607821146
          entropy_coeff: 0.009999999999999998
          kl: 0.005858802140833171
          policy_loss: -0.0027100774149099987
          total_loss: -0.00047918520867824556
          vf_explained_var: 0.15982700884342194
          vf_loss: 0.012763086551179489
    num_agent_steps_sampled: 291000
    num_agent_steps_trained: 291000
    num_steps_sampled: 291000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,291,7689.11,291000,-2.854,-2.22,-3.61,285.4


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 292000
  custom_metrics: {}
  date: 2021-11-05_14-40-32
  done: false
  episode_len_mean: 284.18
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.8417999999999832
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 1117
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.461184389682793e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.0223356988694934
          entropy_coeff: 0.009999999999999998
          kl: 0.005376235567895529
          policy_loss: -0.019416157073444792
          total_loss: -0.01568983776701821
          vf_explained_var: 0.21385015547275543
          vf_loss: 0.013949674450688891
    num_agent_steps_sampled: 292000
    num_agent_steps_trained: 292000
    num_steps_sampled: 292000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,292,7713.42,292000,-2.8418,-2.22,-3.61,284.18


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 293000
  custom_metrics: {}
  date: 2021-11-05_14-40-57
  done: false
  episode_len_mean: 282.53
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.825299999999984
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 1121
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.461184389682793e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.1382929709222582
          entropy_coeff: 0.009999999999999998
          kl: 0.014715448090259918
          policy_loss: 0.01900311749842432
          total_loss: 0.018799446440405317
          vf_explained_var: 0.483818382024765
          vf_loss: 0.011179257908629047
    num_agent_steps_sampled: 293000
    num_agent_steps_trained: 293000
    num_steps_sampled: 293000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,293,7738.2,293000,-2.8253,-2.22,-3.61,282.53


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 294000
  custom_metrics: {}
  date: 2021-11-05_14-41-21
  done: false
  episode_len_mean: 279.95
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.799499999999984
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 1125
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.461184389682793e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.1996514545546637
          entropy_coeff: 0.009999999999999998
          kl: 0.014622088003796642
          policy_loss: 0.026087368031342824
          total_loss: 0.025575267523527144
          vf_explained_var: 0.46243539452552795
          vf_loss: 0.011484415146211784
    num_agent_steps_sampled: 294000
    num_agent_steps_trained: 294000
    num_steps_sampled: 294000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,294,7762.18,294000,-2.7995,-2.22,-3.61,279.95


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 295000
  custom_metrics: {}
  date: 2021-11-05_14-41-45
  done: false
  episode_len_mean: 276.96
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.769599999999985
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 1129
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.461184389682793e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.1940675338109334
          entropy_coeff: 0.009999999999999998
          kl: 0.010682876884457255
          policy_loss: 0.02685824003484514
          total_loss: 0.02337473142478201
          vf_explained_var: 0.6938896775245667
          vf_loss: 0.008457166913689838
    num_agent_steps_sampled: 295000
    num_agent_steps_trained: 295000
    num_steps_sampled: 295000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,295,7786.35,295000,-2.7696,-2.22,-3.33,276.96


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 296000
  custom_metrics: {}
  date: 2021-11-05_14-42-10
  done: false
  episode_len_mean: 275.05
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.750499999999984
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 1133
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.461184389682793e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.1934265984429253
          entropy_coeff: 0.009999999999999998
          kl: 0.01010417035948884
          policy_loss: 0.025691601385672887
          total_loss: 0.028089985582563613
          vf_explained_var: 0.47363248467445374
          vf_loss: 0.01433265052942766
    num_agent_steps_sampled: 296000
    num_agent_steps_trained: 296000
    num_steps_sampled: 296000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,296,7810.65,296000,-2.7505,-2.22,-3.33,275.05


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 297000
  custom_metrics: {}
  date: 2021-11-05_14-42-32
  done: false
  episode_len_mean: 274.45
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.7444999999999853
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 3
  episodes_total: 1136
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.461184389682793e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.1963355753156875
          entropy_coeff: 0.009999999999999998
          kl: 0.00802639392300348
          policy_loss: -0.026090673191679847
          total_loss: -0.02730622531639205
          vf_explained_var: 0.41573265194892883
          vf_loss: 0.01074780514722483
    num_agent_steps_sampled: 297000
    num_agent_steps_trained: 297000
    num_steps_sampled: 297000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,297,7833.55,297000,-2.7445,-2.22,-3.33,274.45




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 298000
  custom_metrics: {}
  date: 2021-11-05_14-43-14
  done: false
  episode_len_mean: 272.79
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.727899999999986
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 1140
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.461184389682793e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.2206526385413277
          entropy_coeff: 0.009999999999999998
          kl: 0.006462160921386169
          policy_loss: 0.008676922155751122
          total_loss: 0.01016981874903043
          vf_explained_var: 0.4194027781486511
          vf_loss: 0.01369942156597972
    num_agent_steps_sampled: 298000
    num_agent_steps_trained: 298000
    num_steps_sampled: 298000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,298,7875.23,298000,-2.7279,-2.22,-3.33,272.79


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 299000
  custom_metrics: {}
  date: 2021-11-05_14-43-37
  done: false
  episode_len_mean: 271.39
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.7138999999999864
  episode_reward_min: -3.1999999999999758
  episodes_this_iter: 4
  episodes_total: 1144
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.461184389682793e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.1639547573195563
          entropy_coeff: 0.009999999999999998
          kl: 0.009616293020575985
          policy_loss: 0.005394280867444145
          total_loss: 0.0090215475194984
          vf_explained_var: 0.29614561796188354
          vf_loss: 0.015266815810981724
    num_agent_steps_sampled: 299000
    num_agent_steps_trained: 299000
    num_steps_sampled: 299000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,299,7898.31,299000,-2.7139,-2.22,-3.2,271.39


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 300000
  custom_metrics: {}
  date: 2021-11-05_14-44-00
  done: false
  episode_len_mean: 270.37
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.703699999999987
  episode_reward_min: -3.1999999999999758
  episodes_this_iter: 3
  episodes_total: 1147
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.461184389682793e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.9835466305414836
          entropy_coeff: 0.009999999999999998
          kl: 0.008414755103562281
          policy_loss: 0.009388947238524755
          total_loss: 0.010415033333831362
          vf_explained_var: 0.3222842216491699
          vf_loss: 0.010861552317833735
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_steps_sampled: 300000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,300,7921.12,300000,-2.7037,-2.22,-3.2,270.37


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 301000
  custom_metrics: {}
  date: 2021-11-05_14-44-24
  done: false
  episode_len_mean: 269.57
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.6956999999999858
  episode_reward_min: -3.1999999999999758
  episodes_this_iter: 4
  episodes_total: 1151
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.461184389682793e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.9414633711179098
          entropy_coeff: 0.009999999999999998
          kl: 0.007598989800259777
          policy_loss: 0.010106192943122651
          total_loss: 0.01091167318324248
          vf_explained_var: 0.5253603458404541
          vf_loss: 0.010220113862305879
    num_agent_steps_sampled: 301000
    num_agent_steps_trained: 301000
    num_steps_sampled: 301000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,301,7944.76,301000,-2.6957,-2.22,-3.2,269.57


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 302000
  custom_metrics: {}
  date: 2021-11-05_14-44-48
  done: false
  episode_len_mean: 267.95
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.6794999999999867
  episode_reward_min: -3.1999999999999758
  episodes_this_iter: 4
  episodes_total: 1155
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.461184389682793e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.8724308325184716
          entropy_coeff: 0.009999999999999998
          kl: 0.010311488978131599
          policy_loss: 0.06760420368777381
          total_loss: 0.06862309078375499
          vf_explained_var: 0.639904260635376
          vf_loss: 0.009743196175744135
    num_agent_steps_sampled: 302000
    num_agent_steps_trained: 302000
    num_steps_sampled: 302000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,302,7969.11,302000,-2.6795,-2.22,-3.2,267.95


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 303000
  custom_metrics: {}
  date: 2021-11-05_14-45-13
  done: false
  episode_len_mean: 266.83
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.6682999999999875
  episode_reward_min: -3.1999999999999758
  episodes_this_iter: 4
  episodes_total: 1159
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.461184389682793e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.9963843239678277
          entropy_coeff: 0.009999999999999998
          kl: 0.0164712139839779
          policy_loss: 0.011935125953621334
          total_loss: 0.010423352320988974
          vf_explained_var: 0.6726453304290771
          vf_loss: 0.008452070204334127
    num_agent_steps_sampled: 303000
    num_agent_steps_trained: 303000
    num_steps_sampled: 303000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,303,7993.47,303000,-2.6683,-2.22,-3.2,266.83


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 304000
  custom_metrics: {}
  date: 2021-11-05_14-45-37
  done: false
  episode_len_mean: 266.5
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.6649999999999876
  episode_reward_min: -3.1999999999999758
  episodes_this_iter: 4
  episodes_total: 1163
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.461184389682793e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.8261618746651543
          entropy_coeff: 0.009999999999999998
          kl: 0.010629316233097165
          policy_loss: 0.03708568149142795
          total_loss: 0.038708492699596614
          vf_explained_var: 0.6516054272651672
          vf_loss: 0.00988442818634212
    num_agent_steps_sampled: 304000
    num_agent_steps_trained: 304000
    num_steps_sampled: 304000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,304,8017.69,304000,-2.665,-2.22,-3.2,266.5


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 305000
  custom_metrics: {}
  date: 2021-11-05_14-46-02
  done: false
  episode_len_mean: 266.2
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.6619999999999875
  episode_reward_min: -3.1999999999999758
  episodes_this_iter: 4
  episodes_total: 1167
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.461184389682793e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.8213026639488008
          entropy_coeff: 0.009999999999999998
          kl: 0.023661862908892634
          policy_loss: 0.06575859619511498
          total_loss: 0.06808407033483187
          vf_explained_var: 0.5975774526596069
          vf_loss: 0.010538500076573755
    num_agent_steps_sampled: 305000
    num_agent_steps_trained: 305000
    num_steps_sampled: 305000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,305,8042.6,305000,-2.662,-2.22,-3.2,266.2




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 306000
  custom_metrics: {}
  date: 2021-11-05_14-46-45
  done: false
  episode_len_mean: 265.48
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.6547999999999865
  episode_reward_min: -3.1999999999999758
  episodes_this_iter: 4
  episodes_total: 1171
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.691776584524187e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.9181680321693421
          entropy_coeff: 0.009999999999999998
          kl: 0.025220543681924883
          policy_loss: 0.0001062471316092544
          total_loss: 0.001599112411753999
          vf_explained_var: 0.4054774045944214
          vf_loss: 0.010674544982612133
    num_agent_steps_sampled: 306000
    num_agent_steps_trained: 306000
    num_steps_sampled: 306000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,306,8085.57,306000,-2.6548,-2.08,-3.2,265.48


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 307000
  custom_metrics: {}
  date: 2021-11-05_14-47-09
  done: false
  episode_len_mean: 264.89
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.6488999999999874
  episode_reward_min: -3.1999999999999758
  episodes_this_iter: 4
  episodes_total: 1175
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.5376648767862814e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.8753094140026304
          entropy_coeff: 0.009999999999999998
          kl: 0.01315569840803524
          policy_loss: 0.032876354621516335
          total_loss: 0.03421036617623435
          vf_explained_var: 0.6219361424446106
          vf_loss: 0.01008710918120212
    num_agent_steps_sampled: 307000
    num_agent_steps_trained: 307000
    num_steps_sampled: 307000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,307,8109.81,307000,-2.6489,-2.08,-3.2,264.89


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 308000
  custom_metrics: {}
  date: 2021-11-05_14-47-34
  done: false
  episode_len_mean: 264.43
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.6442999999999874
  episode_reward_min: -3.1999999999999758
  episodes_this_iter: 4
  episodes_total: 1179
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.5376648767862814e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.793900975253847
          entropy_coeff: 0.009999999999999998
          kl: 0.011351811173905603
          policy_loss: -0.06538219269778993
          total_loss: -0.06219849147730404
          vf_explained_var: 0.4815574586391449
          vf_loss: 0.011122712239416108
    num_agent_steps_sampled: 308000
    num_agent_steps_trained: 308000
    num_steps_sampled: 308000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,308,8134.6,308000,-2.6443,-2.08,-3.2,264.43


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 309000
  custom_metrics: {}
  date: 2021-11-05_14-47-59
  done: false
  episode_len_mean: 263.89
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.638899999999987
  episode_reward_min: -3.1999999999999758
  episodes_this_iter: 4
  episodes_total: 1183
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.5376648767862814e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.8934068196349674
          entropy_coeff: 0.009999999999999998
          kl: 0.04842975259317962
          policy_loss: 0.015200020869572957
          total_loss: 0.017412685768471825
          vf_explained_var: 0.4679867625236511
          vf_loss: 0.011146734054717753
    num_agent_steps_sampled: 309000
    num_agent_steps_trained: 309000
    num_steps_sampled: 309000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,309,8159.53,309000,-2.6389,-2.08,-3.2,263.89


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 310000
  custom_metrics: {}
  date: 2021-11-05_14-48-24
  done: false
  episode_len_mean: 264.01
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.640099999999987
  episode_reward_min: -3.1999999999999758
  episodes_this_iter: 4
  episodes_total: 1187
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.306497315179426e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.8481397933430141
          entropy_coeff: 0.009999999999999998
          kl: 0.01952114338249539
          policy_loss: 0.0050823067625363665
          total_loss: 0.0034992360406451753
          vf_explained_var: 0.7246591448783875
          vf_loss: 0.0068983260490414165
    num_agent_steps_sampled: 310000
    num_agent_steps_trained: 310000
    num_steps_sampled: 310000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,310,8184.58,310000,-2.6401,-2.08,-3.2,264.01


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 311000
  custom_metrics: {}
  date: 2021-11-05_14-48-49
  done: false
  episode_len_mean: 263.55
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.6354999999999875
  episode_reward_min: -3.1999999999999758
  episodes_this_iter: 4
  episodes_total: 1191
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.306497315179426e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.5197756879859501
          entropy_coeff: 0.009999999999999998
          kl: 0.013030871977088054
          policy_loss: 0.004370783807502852
          total_loss: 0.010265940303603808
          vf_explained_var: 0.36435890197753906
          vf_loss: 0.011092910325775545
    num_agent_steps_sampled: 311000
    num_agent_steps_trained: 311000
    num_steps_sampled: 311000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,311,8209.37,311000,-2.6355,-2.08,-3.2,263.55


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 312000
  custom_metrics: {}
  date: 2021-11-05_14-49-15
  done: false
  episode_len_mean: 262.12
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.621199999999988
  episode_reward_min: -3.1999999999999758
  episodes_this_iter: 4
  episodes_total: 1195
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.306497315179426e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.5119109925296571
          entropy_coeff: 0.009999999999999998
          kl: 0.010415767008320307
          policy_loss: 0.03423654217686918
          total_loss: 0.03801877035035028
          vf_explained_var: 0.3821307420730591
          vf_loss: 0.008901337968806426
    num_agent_steps_sampled: 312000
    num_agent_steps_trained: 312000
    num_steps_sampled: 312000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,312,8235.82,312000,-2.6212,-2.08,-3.2,262.12


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 313000
  custom_metrics: {}
  date: 2021-11-05_14-49-40
  done: false
  episode_len_mean: 259.69
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.596899999999988
  episode_reward_min: -3.149999999999977
  episodes_this_iter: 4
  episodes_total: 1199
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.306497315179426e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.7465604351626502
          entropy_coeff: 0.009999999999999998
          kl: 0.038171927181555196
          policy_loss: -0.0017825133270687526
          total_loss: -9.541097614500258e-05
          vf_explained_var: 0.48412254452705383
          vf_loss: 0.009152707540326648
    num_agent_steps_sampled: 313000
    num_agent_steps_trained: 313000
    num_steps_sampled: 313000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,313,8260.29,313000,-2.5969,-2.08,-3.15,259.69




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 314000
  custom_metrics: {}
  date: 2021-11-05_14-50-21
  done: false
  episode_len_mean: 258.41
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.5840999999999887
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 4
  episodes_total: 1203
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.245974597276913e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.9766057458188798
          entropy_coeff: 0.009999999999999998
          kl: 0.012166596171833178
          policy_loss: 0.03033612800969018
          total_loss: 0.030447389433781304
          vf_explained_var: 0.5467385649681091
          vf_loss: 0.00987732059115337
    num_agent_steps_sampled: 314000
    num_agent_steps_trained: 314000
    num_steps_sampled: 314000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,314,8301.59,314000,-2.5841,-2.08,-3.02,258.41


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 315000
  custom_metrics: {}
  date: 2021-11-05_14-50-43
  done: false
  episode_len_mean: 258.17
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.581699999999989
  episode_reward_min: -2.9399999999999813
  episodes_this_iter: 3
  episodes_total: 1206
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.245974597276913e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.9304224497742123
          entropy_coeff: 0.009999999999999998
          kl: 0.015111225952792514
          policy_loss: -0.0823402779797713
          total_loss: -0.07944116658634609
          vf_explained_var: 0.4790111482143402
          vf_loss: 0.012203335668891669
    num_agent_steps_sampled: 315000
    num_agent_steps_trained: 315000
    num_steps_sampled: 315000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,315,8323.7,315000,-2.5817,-2.08,-2.94,258.17


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 316000
  custom_metrics: {}
  date: 2021-11-05_14-51-05
  done: false
  episode_len_mean: 258.81
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.5880999999999887
  episode_reward_min: -2.99999999999998
  episodes_this_iter: 4
  episodes_total: 1210
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.245974597276913e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.9852148771286011
          entropy_coeff: 0.009999999999999998
          kl: 0.023375794694246308
          policy_loss: 0.028851917965544593
          total_loss: 0.030044503758351007
          vf_explained_var: 0.6002764701843262
          vf_loss: 0.011044732853770256
    num_agent_steps_sampled: 316000
    num_agent_steps_trained: 316000
    num_steps_sampled: 316000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,316,8345.99,316000,-2.5881,-2.08,-3,258.81


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 317000
  custom_metrics: {}
  date: 2021-11-05_14-51-30
  done: false
  episode_len_mean: 259.0
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.589999999999988
  episode_reward_min: -2.99999999999998
  episodes_this_iter: 4
  episodes_total: 1214
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.86896189591537e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.9040889057848188
          entropy_coeff: 0.009999999999999998
          kl: 0.029989131337034142
          policy_loss: -0.014400444345341788
          total_loss: -0.010267477068636153
          vf_explained_var: 0.5560834407806396
          vf_loss: 0.013173851805428663
    num_agent_steps_sampled: 317000
    num_agent_steps_trained: 317000
    num_steps_sampled: 317000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,317,8370.19,317000,-2.59,-2.08,-3,259


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 318000
  custom_metrics: {}
  date: 2021-11-05_14-51-52
  done: false
  episode_len_mean: 259.87
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.598699999999988
  episode_reward_min: -2.99999999999998
  episodes_this_iter: 3
  episodes_total: 1217
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8034428438730557e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.9301843212710487
          entropy_coeff: 0.009999999999999998
          kl: 0.012511349831326513
          policy_loss: 0.014657356755601035
          total_loss: 0.015274490664402644
          vf_explained_var: 0.45836299657821655
          vf_loss: 0.009918976557673886
    num_agent_steps_sampled: 318000
    num_agent_steps_trained: 318000
    num_steps_sampled: 318000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,318,8393.05,318000,-2.5987,-2.08,-3,259.87


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 319000
  custom_metrics: {}
  date: 2021-11-05_14-52-16
  done: false
  episode_len_mean: 260.44
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.604399999999988
  episode_reward_min: -2.99999999999998
  episodes_this_iter: 4
  episodes_total: 1221
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8034428438730557e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.9162692864735921
          entropy_coeff: 0.009999999999999998
          kl: 0.010987168753018988
          policy_loss: 0.025287245213985444
          total_loss: 0.02881950210365984
          vf_explained_var: 0.42801693081855774
          vf_loss: 0.012694951705634594
    num_agent_steps_sampled: 319000
    num_agent_steps_trained: 319000
    num_steps_sampled: 319000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,319,8416.59,319000,-2.6044,-2.08,-3,260.44


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 320000
  custom_metrics: {}
  date: 2021-11-05_14-52-40
  done: false
  episode_len_mean: 261.02
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.6101999999999883
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 1225
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8034428438730557e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.9567108001973894
          entropy_coeff: 0.009999999999999998
          kl: 0.019122561188479558
          policy_loss: 0.029515200977524123
          total_loss: 0.03277172413137224
          vf_explained_var: 0.3487374484539032
          vf_loss: 0.01282363071416815
    num_agent_steps_sampled: 320000
    num_agent_steps_trained: 320000
    num_steps_sampled: 320000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,320,8440.27,320000,-2.6102,-2.08,-3.12,261.02


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 321000
  custom_metrics: {}
  date: 2021-11-05_14-53-01
  done: false
  episode_len_mean: 262.14
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.6213999999999875
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 3
  episodes_total: 1228
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8034428438730557e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.1999652107556662
          entropy_coeff: 0.009999999999999998
          kl: 0.02461996386888789
          policy_loss: 0.07589859916932053
          total_loss: 0.07424629314078225
          vf_explained_var: 0.3271510601043701
          vf_loss: 0.010347343896541537
    num_agent_steps_sampled: 321000
    num_agent_steps_trained: 321000
    num_steps_sampled: 321000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,321,8460.97,321000,-2.6214,-2.08,-3.26,262.14




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 322000
  custom_metrics: {}
  date: 2021-11-05_14-53-38
  done: false
  episode_len_mean: 263.39
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.6338999999999873
  episode_reward_min: -3.369999999999972
  episodes_this_iter: 3
  episodes_total: 1231
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.205164265809581e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.38951087196668
          entropy_coeff: 0.009999999999999998
          kl: 0.027665513488982975
          policy_loss: 0.006308291190200382
          total_loss: 0.006323680778344472
          vf_explained_var: 0.23543044924736023
          vf_loss: 0.013910493865518623
    num_agent_steps_sampled: 322000
    num_agent_steps_trained: 322000
    num_steps_sampled: 322000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,322,8498.45,322000,-2.6339,-2.08,-3.37,263.39


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 323000
  custom_metrics: {}
  date: 2021-11-05_14-53-59
  done: false
  episode_len_mean: 264.77
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.6476999999999875
  episode_reward_min: -3.419999999999971
  episodes_this_iter: 3
  episodes_total: 1234
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.307746398714376e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.3616848799917434
          entropy_coeff: 0.009999999999999998
          kl: 0.031957617380552486
          policy_loss: -0.10749255989988644
          total_loss: -0.10348798127637969
          vf_explained_var: 0.05379954352974892
          vf_loss: 0.01762142460793257
    num_agent_steps_sampled: 323000
    num_agent_steps_trained: 323000
    num_steps_sampled: 323000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,323,8519.52,323000,-2.6477,-2.08,-3.42,264.77


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 324000
  custom_metrics: {}
  date: 2021-11-05_14-54-19
  done: false
  episode_len_mean: 266.1
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.6609999999999867
  episode_reward_min: -3.419999999999971
  episodes_this_iter: 3
  episodes_total: 1237
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.461619598071563e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.4155298941665226
          entropy_coeff: 0.009999999999999998
          kl: 0.023920669500961247
          policy_loss: -0.11220570645398564
          total_loss: -0.11000980598231157
          vf_explained_var: 0.19088467955589294
          vf_loss: 0.01635119922252165
    num_agent_steps_sampled: 324000
    num_agent_steps_trained: 324000
    num_steps_sampled: 324000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,324,8539.45,324000,-2.661,-2.08,-3.42,266.1


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 325000
  custom_metrics: {}
  date: 2021-11-05_14-54-39
  done: false
  episode_len_mean: 267.74
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.6773999999999862
  episode_reward_min: -3.419999999999971
  episodes_this_iter: 3
  episodes_total: 1240
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4192429397107339e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.438953533437517
          entropy_coeff: 0.009999999999999998
          kl: 0.024157868555048978
          policy_loss: -0.11580358975463444
          total_loss: -0.1150006333986918
          vf_explained_var: 0.33469128608703613
          vf_loss: 0.015192492689109511
    num_agent_steps_sampled: 325000
    num_agent_steps_trained: 325000
    num_steps_sampled: 325000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,325,8559.26,325000,-2.6774,-2.08,-3.42,267.74


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 326000
  custom_metrics: {}
  date: 2021-11-05_14-54-58
  done: false
  episode_len_mean: 269.95
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.699499999999986
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 3
  episodes_total: 1243
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.128864409566102e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.5349737260076735
          entropy_coeff: 0.009999999999999998
          kl: 0.02425734654141903
          policy_loss: -0.106098316444291
          total_loss: -0.10357783801025815
          vf_explained_var: 0.16662724316120148
          vf_loss: 0.01787021549211608
    num_agent_steps_sampled: 326000
    num_agent_steps_trained: 326000
    num_steps_sampled: 326000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,326,8578.37,326000,-2.6995,-2.08,-3.88,269.95


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 327000
  custom_metrics: {}
  date: 2021-11-05_14-55-19
  done: false
  episode_len_mean: 272.05
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.720499999999985
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 1247
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1932966143491524e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.4322020914819507
          entropy_coeff: 0.009999999999999998
          kl: 0.011039221528684272
          policy_loss: -0.005153128173616198
          total_loss: -0.007362760686212116
          vf_explained_var: 0.4333696663379669
          vf_loss: 0.012112385862403445
    num_agent_steps_sampled: 327000
    num_agent_steps_trained: 327000
    num_steps_sampled: 327000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,327,8598.95,327000,-2.7205,-2.08,-3.88,272.05


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 328000
  custom_metrics: {}
  date: 2021-11-05_14-55-40
  done: false
  episode_len_mean: 272.6
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.725999999999986
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 3
  episodes_total: 1250
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1932966143491524e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.2292670210202534
          entropy_coeff: 0.009999999999999998
          kl: 0.011289561014986462
          policy_loss: 0.028176304035716585
          total_loss: 0.023541194200515748
          vf_explained_var: 0.6328909397125244
          vf_loss: 0.007657562022925251
    num_agent_steps_sampled: 328000
    num_agent_steps_trained: 328000
    num_steps_sampled: 328000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,328,8620.63,328000,-2.726,-2.08,-3.88,272.6


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 329000
  custom_metrics: {}
  date: 2021-11-05_14-56-04
  done: false
  episode_len_mean: 272.84
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.728399999999987
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 1254
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1932966143491524e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.8226616389221615
          entropy_coeff: 0.009999999999999998
          kl: 0.01103450855225745
          policy_loss: 0.03931331137816111
          total_loss: 0.043010793750484784
          vf_explained_var: 0.32016056776046753
          vf_loss: 0.011924098587284486
    num_agent_steps_sampled: 329000
    num_agent_steps_trained: 329000
    num_steps_sampled: 329000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,329,8644.41,329000,-2.7284,-2.08,-3.88,272.84


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 330000
  custom_metrics: {}
  date: 2021-11-05_14-56-30
  done: false
  episode_len_mean: 272.58
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.725799999999986
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 1258
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1932966143491524e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.5862967212994893
          entropy_coeff: 0.009999999999999998
          kl: 0.007299466558206625
          policy_loss: 0.027147284315692053
          total_loss: 0.031743756764464906
          vf_explained_var: 0.332767516374588
          vf_loss: 0.010459438959757487
    num_agent_steps_sampled: 330000
    num_agent_steps_trained: 330000
    num_steps_sampled: 330000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,330,8670.38,330000,-2.7258,-2.08,-3.88,272.58




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 331000
  custom_metrics: {}
  date: 2021-11-05_14-57-15
  done: false
  episode_len_mean: 271.29
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.7128999999999865
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 1262
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1932966143491524e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.4853265662988027
          entropy_coeff: 0.009999999999999998
          kl: 0.006610689371566123
          policy_loss: -0.043774183922343786
          total_loss: -0.036322194917334454
          vf_explained_var: 0.15641818940639496
          vf_loss: 0.012305254851364427
    num_agent_steps_sampled: 331000
    num_agent_steps_trained: 331000
    num_steps_sampled: 331000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,331,8715.08,331000,-2.7129,-2,-3.88,271.29


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 332000
  custom_metrics: {}
  date: 2021-11-05_14-57-40
  done: false
  episode_len_mean: 271.01
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.7100999999999864
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 1266
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1932966143491524e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.5559639182355669
          entropy_coeff: 0.009999999999999998
          kl: 0.004952784216794804
          policy_loss: -0.11106510129239824
          total_loss: -0.1028396659427219
          vf_explained_var: 0.19274961948394775
          vf_loss: 0.013785073875139157
    num_agent_steps_sampled: 332000
    num_agent_steps_trained: 332000
    num_steps_sampled: 332000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,332,8740.54,332000,-2.7101,-2,-3.88,271.01


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 333000
  custom_metrics: {}
  date: 2021-11-05_14-58-04
  done: false
  episode_len_mean: 271.77
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.7176999999999856
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 1270
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5966483071745762e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.6275853580898709
          entropy_coeff: 0.009999999999999998
          kl: 0.00699567981623801
          policy_loss: 0.00012347429162926145
          total_loss: 0.002663843333721161
          vf_explained_var: 0.4164305329322815
          vf_loss: 0.008816221330521835
    num_agent_steps_sampled: 333000
    num_agent_steps_trained: 333000
    num_steps_sampled: 333000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,333,8764.16,333000,-2.7177,-2,-3.88,271.77


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 334000
  custom_metrics: {}
  date: 2021-11-05_14-58-30
  done: false
  episode_len_mean: 270.86
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.708599999999986
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 5
  episodes_total: 1275
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5966483071745762e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.3997003926171197
          entropy_coeff: 0.009999999999999998
          kl: 0.008025291361869809
          policy_loss: -0.009319077018234465
          total_loss: 0.003020425968699985
          vf_explained_var: 0.18050114810466766
          vf_loss: 0.016336504297537937
    num_agent_steps_sampled: 334000
    num_agent_steps_trained: 334000
    num_steps_sampled: 334000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,334,8790.06,334000,-2.7086,-2,-3.88,270.86


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 335000
  custom_metrics: {}
  date: 2021-11-05_14-58-55
  done: false
  episode_len_mean: 270.68
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.7067999999999866
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 1279
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5966483071745762e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.4831040855911043
          entropy_coeff: 0.009999999999999998
          kl: 0.0032547314515881786
          policy_loss: -0.0027282192475265926
          total_loss: 0.0004700586199760437
          vf_explained_var: 0.5702101588249207
          vf_loss: 0.00802931843015055
    num_agent_steps_sampled: 335000
    num_agent_steps_trained: 335000
    num_steps_sampled: 335000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,335,8815.17,335000,-2.7068,-2,-3.88,270.68


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 336000
  custom_metrics: {}
  date: 2021-11-05_14-59-21
  done: false
  episode_len_mean: 269.67
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.6966999999999866
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 1283
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.983241535872881e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.26434836437304815
          entropy_coeff: 0.009999999999999998
          kl: 0.0012558782876324651
          policy_loss: 0.027027307781908247
          total_loss: 0.03724467191431258
          vf_explained_var: 0.05497255176305771
          vf_loss: 0.012860845733020041
    num_agent_steps_sampled: 336000
    num_agent_steps_trained: 336000
    num_steps_sampled: 336000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,336,8841.21,336000,-2.6967,-2,-3.88,269.67


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 337000
  custom_metrics: {}
  date: 2021-11-05_14-59-47
  done: false
  episode_len_mean: 269.41
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.694099999999986
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 1287
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.9916207679364405e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.5110839721229341
          entropy_coeff: 0.009999999999999998
          kl: 0.016975740073080045
          policy_loss: 0.02148049386839072
          total_loss: 0.029569811291164823
          vf_explained_var: 0.23653683066368103
          vf_loss: 0.013200153907140097
    num_agent_steps_sampled: 337000
    num_agent_steps_trained: 337000
    num_steps_sampled: 337000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,337,8866.8,337000,-2.6941,-2,-3.88,269.41




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 338000
  custom_metrics: {}
  date: 2021-11-05_15-00-30
  done: false
  episode_len_mean: 268.77
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.687699999999986
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 1291
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.9916207679364405e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.3828925993707445
          entropy_coeff: 0.009999999999999998
          kl: 0.014869217820440718
          policy_loss: -0.09550124812457296
          total_loss: -0.08437825921509001
          vf_explained_var: 0.199815034866333
          vf_loss: 0.014951913565811183
    num_agent_steps_sampled: 338000
    num_agent_steps_trained: 338000
    num_steps_sampled: 338000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,338,8910.36,338000,-2.6877,-2,-3.88,268.77


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 339000
  custom_metrics: {}
  date: 2021-11-05_15-00-56
  done: false
  episode_len_mean: 268.96
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.6895999999999867
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 5
  episodes_total: 1296
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.9916207679364405e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.7189504894945357
          entropy_coeff: 0.009999999999999998
          kl: 0.03714825724166067
          policy_loss: -0.019473934008015528
          total_loss: -0.014447407093313006
          vf_explained_var: 0.43653520941734314
          vf_loss: 0.012216031706581513
    num_agent_steps_sampled: 339000
    num_agent_steps_trained: 339000
    num_steps_sampled: 339000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,339,8935.81,339000,-2.6896,-2,-3.88,268.96


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 340000
  custom_metrics: {}
  date: 2021-11-05_15-01-21
  done: false
  episode_len_mean: 269.02
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.690199999999986
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 1300
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.987431151904659e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.7689770056141747
          entropy_coeff: 0.009999999999999998
          kl: 0.03940462930491167
          policy_loss: -0.014501512381765577
          total_loss: -0.009347890069087346
          vf_explained_var: 0.5315033197402954
          vf_loss: 0.012843390668017996
    num_agent_steps_sampled: 340000
    num_agent_steps_trained: 340000
    num_steps_sampled: 340000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,340,8961.38,340000,-2.6902,-2,-3.88,269.02


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 341000
  custom_metrics: {}
  date: 2021-11-05_15-01-47
  done: false
  episode_len_mean: 267.92
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.6791999999999865
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 1304
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.981146727856991e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.6666315131717258
          entropy_coeff: 0.009999999999999998
          kl: 0.03150178535544552
          policy_loss: -0.0068370376196172505
          total_loss: 0.0031275310036208894
          vf_explained_var: 0.31489959359169006
          vf_loss: 0.01663088427028722
    num_agent_steps_sampled: 341000
    num_agent_steps_trained: 341000
    num_steps_sampled: 341000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,341,8986.45,341000,-2.6792,-2,-3.88,267.92


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 342000
  custom_metrics: {}
  date: 2021-11-05_15-02-12
  done: false
  episode_len_mean: 266.38
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.663799999999987
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 1308
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3471720091785483e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.8515991038746304
          entropy_coeff: 0.009999999999999998
          kl: 0.010439258981622224
          policy_loss: -0.004293743769327799
          total_loss: -0.0016615903625885645
          vf_explained_var: 0.5704030990600586
          vf_loss: 0.011148143373429776
    num_agent_steps_sampled: 342000
    num_agent_steps_trained: 342000
    num_steps_sampled: 342000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,342,9012,342000,-2.6638,-2,-3.88,266.38


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 343000
  custom_metrics: {}
  date: 2021-11-05_15-02-37
  done: false
  episode_len_mean: 266.49
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.664899999999987
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 3
  episodes_total: 1311
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3471720091785483e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.5624660352865856
          entropy_coeff: 0.009999999999999998
          kl: 0.03856750595435009
          policy_loss: 0.053001878493362006
          total_loss: 0.04419437812434302
          vf_explained_var: 0.4364519417285919
          vf_loss: 0.00681715756404123
    num_agent_steps_sampled: 343000
    num_agent_steps_trained: 343000
    num_steps_sampled: 343000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,343,9036.98,343000,-2.6649,-2,-3.88,266.49


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 344000
  custom_metrics: {}
  date: 2021-11-05_15-03-03
  done: false
  episode_len_mean: 265.57
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.6556999999999875
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 1315
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0207580137678223e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.8461472405327691
          entropy_coeff: 0.009999999999999998
          kl: 0.015616702500220627
          policy_loss: -0.013113799773984485
          total_loss: -0.011083818309836917
          vf_explained_var: 0.5923148393630981
          vf_loss: 0.01049145624662439
    num_agent_steps_sampled: 344000
    num_agent_steps_trained: 344000
    num_steps_sampled: 344000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,344,9063.05,344000,-2.6557,-2,-3.88,265.57




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 345000
  custom_metrics: {}
  date: 2021-11-05_15-03-46
  done: false
  episode_len_mean: 263.77
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.6376999999999877
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 5
  episodes_total: 1320
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0207580137678223e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.47207166420088875
          entropy_coeff: 0.009999999999999998
          kl: 0.018924832234370904
          policy_loss: 0.06476303910215696
          total_loss: 0.07340608657234245
          vf_explained_var: 0.4503251910209656
          vf_loss: 0.01336376356581847
    num_agent_steps_sampled: 345000
    num_agent_steps_trained: 345000
    num_steps_sampled: 345000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,345,9105.91,345000,-2.6377,-2,-3.88,263.77


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 346000
  custom_metrics: {}
  date: 2021-11-05_15-04-14
  done: false
  episode_len_mean: 262.54
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.625399999999987
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 1324
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0207580137678223e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.6728902267085182
          entropy_coeff: 0.009999999999999998
          kl: 0.008309117178274247
          policy_loss: 0.01895493260688252
          total_loss: 0.021281955929266082
          vf_explained_var: 0.6318545341491699
          vf_loss: 0.009055925771180126
    num_agent_steps_sampled: 346000
    num_agent_steps_trained: 346000
    num_steps_sampled: 346000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,346,9134.1,346000,-2.6254,-2,-3.88,262.54


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 347000
  custom_metrics: {}
  date: 2021-11-05_15-04-41
  done: false
  episode_len_mean: 259.89
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.5988999999999884
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 1328
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0207580137678223e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.6868873642550575
          entropy_coeff: 0.009999999999999998
          kl: 0.027683597252952948
          policy_loss: 0.03250078815552923
          total_loss: 0.03906974138485061
          vf_explained_var: 0.3107500374317169
          vf_loss: 0.013437826827996307
    num_agent_steps_sampled: 347000
    num_agent_steps_trained: 347000
    num_steps_sampled: 347000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,347,9160.96,347000,-2.5989,-2,-3.88,259.89


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 348000
  custom_metrics: {}
  date: 2021-11-05_15-05-09
  done: false
  episode_len_mean: 256.73
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.567299999999989
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 4
  episodes_total: 1332
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0311370206517327e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.4921191304922104
          entropy_coeff: 0.009999999999999998
          kl: 0.005378021487982013
          policy_loss: -0.026699970869554415
          total_loss: -0.01986019354727533
          vf_explained_var: 0.3810824155807495
          vf_loss: 0.011760968352771468
    num_agent_steps_sampled: 348000
    num_agent_steps_trained: 348000
    num_steps_sampled: 348000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,348,9188.52,348000,-2.5673,-2,-3.88,256.73


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 349000
  custom_metrics: {}
  date: 2021-11-05_15-05-36
  done: false
  episode_len_mean: 251.95
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.51949999999999
  episode_reward_min: -3.8799999999999613
  episodes_this_iter: 5
  episodes_total: 1337
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0311370206517327e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.5275368862681918
          entropy_coeff: 0.009999999999999998
          kl: 0.008823741238996355
          policy_loss: -0.00857888265616364
          total_loss: 0.001038418710231781
          vf_explained_var: 0.28795668482780457
          vf_loss: 0.014892667511271105
    num_agent_steps_sampled: 349000
    num_agent_steps_trained: 349000
    num_steps_sampled: 349000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,349,9215.55,349000,-2.5195,-2,-3.88,251.95


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 350000
  custom_metrics: {}
  date: 2021-11-05_15-06-02
  done: false
  episode_len_mean: 248.41
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.4840999999999913
  episode_reward_min: -3.6899999999999653
  episodes_this_iter: 4
  episodes_total: 1341
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0311370206517327e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.6403732077942954
          entropy_coeff: 0.009999999999999998
          kl: 0.7385389604635884
          policy_loss: 0.04146170218785604
          total_loss: 0.054630768216318557
          vf_explained_var: 0.24314825236797333
          vf_loss: 0.019572799497594436
    num_agent_steps_sampled: 350000
    num_agent_steps_trained: 350000
    num_steps_sampled: 350000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,350,9241.76,350000,-2.4841,-2,-3.69,248.41


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 351000
  custom_metrics: {}
  date: 2021-11-05_15-06-27
  done: false
  episode_len_mean: 245.55
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.455499999999992
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 1345
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.5467055309776e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.8647826890150706
          entropy_coeff: 0.009999999999999998
          kl: 0.020819786016268122
          policy_loss: 0.04342195433047083
          total_loss: 0.05141278273529477
          vf_explained_var: 0.19385288655757904
          vf_loss: 0.016638659158100685
    num_agent_steps_sampled: 351000
    num_agent_steps_trained: 351000
    num_steps_sampled: 351000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,351,9266.38,351000,-2.4555,-2,-3.4,245.55


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 352000
  custom_metrics: {}
  date: 2021-11-05_15-06-53
  done: false
  episode_len_mean: 242.83
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.4282999999999926
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 1349
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.8200582964664e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.6846796207957797
          entropy_coeff: 0.009999999999999998
          kl: 0.005435768102166902
          policy_loss: 0.021289596292707654
          total_loss: 0.028468956467178133
          vf_explained_var: 0.24234500527381897
          vf_loss: 0.014026155778103405
    num_agent_steps_sampled: 352000
    num_agent_steps_trained: 352000
    num_steps_sampled: 352000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,352,9292.22,352000,-2.4283,-2,-3.4,242.83




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 353000
  custom_metrics: {}
  date: 2021-11-05_15-07-37
  done: false
  episode_len_mean: 242.06
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.420599999999992
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 1353
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.8200582964664e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.6532117519113753
          entropy_coeff: 0.009999999999999998
          kl: 0.009648993755642246
          policy_loss: -0.03374505312078529
          total_loss: -0.027233215421438216
          vf_explained_var: 0.24106448888778687
          vf_loss: 0.013043951460470755
    num_agent_steps_sampled: 353000
    num_agent_steps_trained: 353000
    num_steps_sampled: 353000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,353,9336.19,353000,-2.4206,-2,-3.4,242.06


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 354000
  custom_metrics: {}
  date: 2021-11-05_15-08-02
  done: false
  episode_len_mean: 241.41
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -2.4140999999999924
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 1357
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.8200582964664e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.51677618390984
          entropy_coeff: 0.009999999999999998
          kl: 0.004669754817631618
          policy_loss: -0.11090020222796335
          total_loss: -0.09913874218861261
          vf_explained_var: 0.16827180981636047
          vf_loss: 0.016929224164535604
    num_agent_steps_sampled: 354000
    num_agent_steps_trained: 354000
    num_steps_sampled: 354000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,354,9362.01,354000,-2.4141,-2,-3.4,241.41


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 355000
  custom_metrics: {}
  date: 2021-11-05_15-08-28
  done: false
  episode_len_mean: 241.84
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.418399999999992
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 5
  episodes_total: 1362
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4100291482332e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.574217736058765
          entropy_coeff: 0.009999999999999998
          kl: 0.0037929084077063637
          policy_loss: 0.009431092441082001
          total_loss: 0.01813378483057022
          vf_explained_var: 0.15910111367702484
          vf_loss: 0.014444871867696444
    num_agent_steps_sampled: 355000
    num_agent_steps_trained: 355000
    num_steps_sampled: 355000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,355,9387.82,355000,-2.4184,-2.1,-3.4,241.84


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 356000
  custom_metrics: {}
  date: 2021-11-05_15-08-55
  done: false
  episode_len_mean: 241.23
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.4122999999999926
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 1366
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7050145741166e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.4516955895556344
          entropy_coeff: 0.009999999999999998
          kl: 0.00318401746716914
          policy_loss: 0.013945897751384311
          total_loss: 0.02359591474135717
          vf_explained_var: 0.05392911657691002
          vf_loss: 0.014166973251849412
    num_agent_steps_sampled: 356000
    num_agent_steps_trained: 356000
    num_steps_sampled: 356000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,356,9414.43,356000,-2.4123,-2.1,-3.4,241.23


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 357000
  custom_metrics: {}
  date: 2021-11-05_15-09-21
  done: false
  episode_len_mean: 240.27
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.4026999999999923
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 1370
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.525072870583e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.40605338480737474
          entropy_coeff: 0.009999999999999998
          kl: 0.003441723127477303
          policy_loss: -0.010249734256002637
          total_loss: -0.00047595898310343423
          vf_explained_var: 0.08350706100463867
          vf_loss: 0.013834309836642609
    num_agent_steps_sampled: 357000
    num_agent_steps_trained: 357000
    num_steps_sampled: 357000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,357,9440.52,357000,-2.4027,-2.1,-3.4,240.27


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 358000
  custom_metrics: {}
  date: 2021-11-05_15-09-47
  done: false
  episode_len_mean: 239.99
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.3998999999999926
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 5
  episodes_total: 1375
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.2625364352915e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.435470488998625
          entropy_coeff: 0.009999999999999998
          kl: 0.006904822606240815
          policy_loss: -0.0057753150661786394
          total_loss: 0.0067693430516454905
          vf_explained_var: 0.12869027256965637
          vf_loss: 0.016899361947758332
    num_agent_steps_sampled: 358000
    num_agent_steps_trained: 358000
    num_steps_sampled: 358000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,358,9466.88,358000,-2.3999,-2.1,-3.4,239.99


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 359000
  custom_metrics: {}
  date: 2021-11-05_15-10-14
  done: false
  episode_len_mean: 239.48
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.3947999999999925
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 1379
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.2625364352915e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.48751071559058295
          entropy_coeff: 0.009999999999999998
          kl: 0.005710192887386256
          policy_loss: 0.018108618507782618
          total_loss: 0.026821171657906637
          vf_explained_var: 0.08112378418445587
          vf_loss: 0.013587658169368903
    num_agent_steps_sampled: 359000
    num_agent_steps_trained: 359000
    num_steps_sampled: 359000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,359,9493.42,359000,-2.3948,-2.1,-3.4,239.48




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 360000
  custom_metrics: {}
  date: 2021-11-05_15-10-59
  done: false
  episode_len_mean: 239.34
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.3933999999999926
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 1383
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.2625364352915e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.5931679467360179
          entropy_coeff: 0.009999999999999998
          kl: 0.0037121022456391177
          policy_loss: -0.007537512315644158
          total_loss: 0.0007970160908169217
          vf_explained_var: 0.09075894951820374
          vf_loss: 0.014266208238485787
    num_agent_steps_sampled: 360000
    num_agent_steps_trained: 360000
    num_steps_sampled: 360000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,360,9538.81,360000,-2.3934,-2.04,-3.4,239.34


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 361000
  custom_metrics: {}
  date: 2021-11-05_15-11-26
  done: false
  episode_len_mean: 238.88
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.3887999999999927
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 5
  episodes_total: 1388
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.13126821764575e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.7277587691942851
          entropy_coeff: 0.009999999999999998
          kl: 0.010051243193880862
          policy_loss: -0.022461921556128396
          total_loss: -0.014514097405804529
          vf_explained_var: 0.24910715222358704
          vf_loss: 0.015225414600637224
    num_agent_steps_sampled: 361000
    num_agent_steps_trained: 361000
    num_steps_sampled: 361000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,361,9565.54,361000,-2.3888,-2.04,-3.4,238.88


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 362000
  custom_metrics: {}
  date: 2021-11-05_15-11-53
  done: false
  episode_len_mean: 238.82
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.3881999999999923
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 1392
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.13126821764575e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.6827774273024665
          entropy_coeff: 0.009999999999999998
          kl: 0.0059328326295352935
          policy_loss: 0.018776005340947047
          total_loss: 0.023630424257781772
          vf_explained_var: 0.2757100760936737
          vf_loss: 0.011682192515581846
    num_agent_steps_sampled: 362000
    num_agent_steps_trained: 362000
    num_steps_sampled: 362000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,362,9592.64,362000,-2.3882,-2.04,-3.4,238.82


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 363000
  custom_metrics: {}
  date: 2021-11-05_15-12-20
  done: false
  episode_len_mean: 238.23
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.3822999999999928
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 1396
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.13126821764575e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.5518294950326283
          entropy_coeff: 0.009999999999999998
          kl: 0.005095114583051618
          policy_loss: -0.00025796608792410955
          total_loss: 0.005801737060149511
          vf_explained_var: 0.2520805299282074
          vf_loss: 0.011577995959669352
    num_agent_steps_sampled: 363000
    num_agent_steps_trained: 363000
    num_steps_sampled: 363000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,363,9619.2,363000,-2.3823,-2.04,-3.4,238.23


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 364000
  custom_metrics: {}
  date: 2021-11-05_15-12-47
  done: false
  episode_len_mean: 237.85
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.3784999999999927
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 5
  episodes_total: 1401
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.13126821764575e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.5641773415936364
          entropy_coeff: 0.009999999999999998
          kl: 0.0040762928732681935
          policy_loss: -0.028511625528335572
          total_loss: -0.016967807710170747
          vf_explained_var: 0.08499600738286972
          vf_loss: 0.017185591482039957
    num_agent_steps_sampled: 364000
    num_agent_steps_trained: 364000
    num_steps_sampled: 364000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,364,9646.11,364000,-2.3785,-2.04,-3.4,237.85


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 365000
  custom_metrics: {}
  date: 2021-11-05_15-13-13
  done: false
  episode_len_mean: 236.72
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.3671999999999933
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 1405
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.065634108822875e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.5262362354331547
          entropy_coeff: 0.009999999999999998
          kl: 0.004820346981595157
          policy_loss: 0.02620316685901748
          total_loss: 0.03383775187863244
          vf_explained_var: 0.0938301831483841
          vf_loss: 0.012896943495919307
    num_agent_steps_sampled: 365000
    num_agent_steps_trained: 365000
    num_steps_sampled: 365000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,365,9672.72,365000,-2.3672,-2.04,-3.4,236.72


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 366000
  custom_metrics: {}
  date: 2021-11-05_15-13-40
  done: false
  episode_len_mean: 236.23
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.3622999999999936
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 1409
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.328170544114375e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.49571665392981634
          entropy_coeff: 0.009999999999999998
          kl: 0.004245947956195551
          policy_loss: 0.00272593821088473
          total_loss: 0.012125968519184325
          vf_explained_var: 0.05041394755244255
          vf_loss: 0.014357195494489538
    num_agent_steps_sampled: 366000
    num_agent_steps_trained: 366000
    num_steps_sampled: 366000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,366,9699.46,366000,-2.3623,-2.04,-3.4,236.23




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 367000
  custom_metrics: {}
  date: 2021-11-05_15-14-25
  done: false
  episode_len_mean: 234.04
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.340399999999994
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 5
  episodes_total: 1414
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6640852720571875e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.5851769238710404
          entropy_coeff: 0.009999999999999998
          kl: 0.007146414447858263
          policy_loss: -0.018651906649271646
          total_loss: -0.007486062662469016
          vf_explained_var: 0.18680767714977264
          vf_loss: 0.017017617118027475
    num_agent_steps_sampled: 367000
    num_agent_steps_trained: 367000
    num_steps_sampled: 367000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,367,9744.51,367000,-2.3404,-2.04,-2.71,234.04


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 368000
  custom_metrics: {}
  date: 2021-11-05_15-14-52
  done: false
  episode_len_mean: 233.82
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.3381999999999934
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 1418
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6640852720571875e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.6101617170704736
          entropy_coeff: 0.009999999999999998
          kl: 0.0039820247612757
          policy_loss: 0.04172614432043499
          total_loss: 0.04842495885160234
          vf_explained_var: 0.05534099414944649
          vf_loss: 0.012800429285400443
    num_agent_steps_sampled: 368000
    num_agent_steps_trained: 368000
    num_steps_sampled: 368000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,368,9770.76,368000,-2.3382,-2.04,-2.71,233.82


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 369000
  custom_metrics: {}
  date: 2021-11-05_15-15-18
  done: false
  episode_len_mean: 233.88
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.3387999999999938
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 1422
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3320426360285938e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.6249575701024798
          entropy_coeff: 0.009999999999999998
          kl: 0.0050811180851721045
          policy_loss: 0.04069348979327414
          total_loss: 0.04678239640262392
          vf_explained_var: 0.15463459491729736
          vf_loss: 0.012338482702357902
    num_agent_steps_sampled: 369000
    num_agent_steps_trained: 369000
    num_steps_sampled: 369000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,369,9797.3,369000,-2.3388,-2.04,-2.71,233.88


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 370000
  custom_metrics: {}
  date: 2021-11-05_15-15-45
  done: false
  episode_len_mean: 233.92
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.3391999999999937
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 1426
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3320426360285938e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.627195264895757
          entropy_coeff: 0.009999999999999998
          kl: 0.004935007787584035
          policy_loss: -0.03243587882154518
          total_loss: -0.024932772459255324
          vf_explained_var: 0.16259124875068665
          vf_loss: 0.01377505792511834
    num_agent_steps_sampled: 370000
    num_agent_steps_trained: 370000
    num_steps_sampled: 370000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,370,9823.62,370000,-2.3392,-2.04,-2.71,233.92


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 371000
  custom_metrics: {}
  date: 2021-11-05_15-16-10
  done: false
  episode_len_mean: 234.27
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.3426999999999936
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 5
  episodes_total: 1431
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.660213180142969e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.7330943379137251
          entropy_coeff: 0.009999999999999998
          kl: 0.0068424444003618926
          policy_loss: -0.002354405903153949
          total_loss: 0.007415209379461076
          vf_explained_var: 0.22153541445732117
          vf_loss: 0.017100558678309122
    num_agent_steps_sampled: 371000
    num_agent_steps_trained: 371000
    num_steps_sampled: 371000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,371,9849.14,371000,-2.3427,-2.04,-2.71,234.27


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 372000
  custom_metrics: {}
  date: 2021-11-05_15-16-36
  done: false
  episode_len_mean: 234.97
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.3496999999999932
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 1435
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.660213180142969e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.8187313079833984
          entropy_coeff: 0.009999999999999998
          kl: 0.007718613268554009
          policy_loss: -0.01069841343495581
          total_loss: -0.005752125216854943
          vf_explained_var: 0.1988757848739624
          vf_loss: 0.013133600074797868
    num_agent_steps_sampled: 372000
    num_agent_steps_trained: 372000
    num_steps_sampled: 372000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,372,9874.56,372000,-2.3497,-2.04,-2.71,234.97


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 373000
  custom_metrics: {}
  date: 2021-11-05_15-17-02
  done: false
  episode_len_mean: 235.01
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.3500999999999936
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 1439
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.660213180142969e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.6537334654066298
          entropy_coeff: 0.009999999999999998
          kl: 0.005704788373751373
          policy_loss: 0.002001660896672143
          total_loss: 0.007673255271381802
          vf_explained_var: 0.20513492822647095
          vf_loss: 0.012208924628794193
    num_agent_steps_sampled: 373000
    num_agent_steps_trained: 373000
    num_steps_sampled: 373000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,373,9900.55,373000,-2.3501,-2.04,-2.71,235.01




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 374000
  custom_metrics: {}
  date: 2021-11-05_15-17-46
  done: false
  episode_len_mean: 234.88
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.3487999999999936
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 1443
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.660213180142969e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.7146777007314894
          entropy_coeff: 0.009999999999999998
          kl: 0.008615878977376843
          policy_loss: 0.03558969580464893
          total_loss: 0.039612369818819895
          vf_explained_var: 0.19223013520240784
          vf_loss: 0.011169449374493625
    num_agent_steps_sampled: 374000
    num_agent_steps_trained: 374000
    num_steps_sampled: 374000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,374,9944.58,374000,-2.3488,-2.04,-2.71,234.88


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 375000
  custom_metrics: {}
  date: 2021-11-05_15-18-12
  done: false
  episode_len_mean: 234.13
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.3412999999999937
  episode_reward_min: -2.5499999999999896
  episodes_this_iter: 4
  episodes_total: 1447
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.660213180142969e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.6160941945181952
          entropy_coeff: 0.009999999999999998
          kl: 0.005939396441574216
          policy_loss: -0.0029404696491029526
          total_loss: 0.00313136519657241
          vf_explained_var: 0.1922985017299652
          vf_loss: 0.01223277503417598
    num_agent_steps_sampled: 375000
    num_agent_steps_trained: 375000
    num_steps_sampled: 375000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,375,9970.71,375000,-2.3413,-2.04,-2.55,234.13


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 376000
  custom_metrics: {}
  date: 2021-11-05_15-18-38
  done: false
  episode_len_mean: 233.9
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.3389999999999938
  episode_reward_min: -2.5499999999999896
  episodes_this_iter: 5
  episodes_total: 1452
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.660213180142969e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.6278581566280789
          entropy_coeff: 0.009999999999999998
          kl: 0.0051098717106692485
          policy_loss: 0.004284011613991525
          total_loss: 0.01273055010371738
          vf_explained_var: 0.2582985460758209
          vf_loss: 0.014725119941350487
    num_agent_steps_sampled: 376000
    num_agent_steps_trained: 376000
    num_steps_sampled: 376000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,376,9996.99,376000,-2.339,-2.04,-2.55,233.9


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 377000
  custom_metrics: {}
  date: 2021-11-05_15-19-04
  done: false
  episode_len_mean: 233.84
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.3383999999999943
  episode_reward_min: -2.5499999999999896
  episodes_this_iter: 4
  episodes_total: 1456
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.660213180142969e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.6403337571356031
          entropy_coeff: 0.009999999999999998
          kl: 0.004836941693941318
          policy_loss: 0.025095079839229584
          total_loss: 0.03072982620861795
          vf_explained_var: 0.2201279252767563
          vf_loss: 0.012038085433757967
    num_agent_steps_sampled: 377000
    num_agent_steps_trained: 377000
    num_steps_sampled: 377000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,377,10023,377000,-2.3384,-2.04,-2.55,233.84


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 378000
  custom_metrics: {}
  date: 2021-11-05_15-19-30
  done: false
  episode_len_mean: 233.75
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.337499999999994
  episode_reward_min: -2.5499999999999896
  episodes_this_iter: 4
  episodes_total: 1460
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3301065900714844e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.5783601002560721
          entropy_coeff: 0.009999999999999998
          kl: 0.006448701326180147
          policy_loss: 0.0482485750483142
          total_loss: 0.05318429983324475
          vf_explained_var: 0.3141166865825653
          vf_loss: 0.010719324700120422
    num_agent_steps_sampled: 378000
    num_agent_steps_trained: 378000
    num_steps_sampled: 378000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,378,10049.4,378000,-2.3375,-2.04,-2.55,233.75


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 379000
  custom_metrics: {}
  date: 2021-11-05_15-19-57
  done: false
  episode_len_mean: 233.59
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.335899999999994
  episode_reward_min: -2.5499999999999896
  episodes_this_iter: 4
  episodes_total: 1464
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3301065900714844e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.4826726926697625
          entropy_coeff: 0.009999999999999998
          kl: 0.00747579575587412
          policy_loss: -0.04580030027363036
          total_loss: -0.040959516498777604
          vf_explained_var: 0.41487744450569153
          vf_loss: 0.009667509887367486
    num_agent_steps_sampled: 379000
    num_agent_steps_trained: 379000
    num_steps_sampled: 379000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,379,10075.8,379000,-2.3359,-2.04,-2.55,233.59


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 380000
  custom_metrics: {}
  date: 2021-11-05_15-20-24
  done: false
  episode_len_mean: 233.37
  episode_media: {}
  episode_reward_max: -2.0400000000000005
  episode_reward_mean: -2.333699999999994
  episode_reward_min: -2.5499999999999896
  episodes_this_iter: 5
  episodes_total: 1469
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3301065900714844e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.5083462241623137
          entropy_coeff: 0.009999999999999998
          kl: 0.005668664158493186
          policy_loss: 0.009440910981761084
          total_loss: 0.019883755346139273
          vf_explained_var: 0.10694754868745804
          vf_loss: 0.01552630580133862
    num_agent_steps_sampled: 380000
    num_agent_steps_trained: 380000
    num_steps_sampled: 380000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,380,10102.3,380000,-2.3337,-2.04,-2.55,233.37




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 381000
  custom_metrics: {}
  date: 2021-11-05_15-21-08
  done: false
  episode_len_mean: 233.05
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.330499999999994
  episode_reward_min: -2.5499999999999896
  episodes_this_iter: 4
  episodes_total: 1473
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3301065900714844e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.48736248513062796
          entropy_coeff: 0.009999999999999998
          kl: 0.010182197388135745
          policy_loss: 0.003420955604977078
          total_loss: 0.007642424768871731
          vf_explained_var: 0.3429071307182312
          vf_loss: 0.009095095232543019
    num_agent_steps_sampled: 381000
    num_agent_steps_trained: 381000
    num_steps_sampled: 381000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,381,10146.5,381000,-2.3305,-1.98,-2.55,233.05


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 382000
  custom_metrics: {}
  date: 2021-11-05_15-21-34
  done: false
  episode_len_mean: 233.06
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.3305999999999942
  episode_reward_min: -2.5499999999999896
  episodes_this_iter: 5
  episodes_total: 1478
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3301065900714844e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.3819941133260727
          entropy_coeff: 0.009999999999999998
          kl: 0.016631903664932956
          policy_loss: -0.0494696910182635
          total_loss: -0.04040662190980381
          vf_explained_var: 0.1779443472623825
          vf_loss: 0.012883011137859689
    num_agent_steps_sampled: 382000
    num_agent_steps_trained: 382000
    num_steps_sampled: 382000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,382,10173,382000,-2.3306,-1.98,-2.55,233.06


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 383000
  custom_metrics: {}
  date: 2021-11-05_15-22-01
  done: false
  episode_len_mean: 233.13
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.331299999999994
  episode_reward_min: -2.5499999999999896
  episodes_this_iter: 4
  episodes_total: 1482
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3301065900714844e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.6800899250639809
          entropy_coeff: 0.009999999999999998
          kl: 0.04482265799054035
          policy_loss: 0.01504322662949562
          total_loss: 0.02082842058605618
          vf_explained_var: 0.037746019661426544
          vf_loss: 0.012586088923530445
    num_agent_steps_sampled: 383000
    num_agent_steps_trained: 383000
    num_steps_sampled: 383000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,383,10199.8,383000,-2.3313,-1.98,-2.55,233.13


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 384000
  custom_metrics: {}
  date: 2021-11-05_15-22-26
  done: false
  episode_len_mean: 233.67
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.336699999999994
  episode_reward_min: -2.5899999999999888
  episodes_this_iter: 4
  episodes_total: 1486
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.995159885107228e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.1465286387337579
          entropy_coeff: 0.009999999999999998
          kl: 0.02253467607659508
          policy_loss: 0.029121080537637074
          total_loss: 0.02932839675082101
          vf_explained_var: 0.2461554855108261
          vf_loss: 0.01167260424958335
    num_agent_steps_sampled: 384000
    num_agent_steps_trained: 384000
    num_steps_sampled: 384000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,384,10224.5,384000,-2.3367,-1.98,-2.59,233.67


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 385000
  custom_metrics: {}
  date: 2021-11-05_15-22-50
  done: false
  episode_len_mean: 234.75
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.3474999999999935
  episode_reward_min: -2.809999999999984
  episodes_this_iter: 4
  episodes_total: 1490
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.49273982766084e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.4375544826189677
          entropy_coeff: 0.009999999999999998
          kl: 0.024718456694386107
          policy_loss: 0.03880051912532912
          total_loss: 0.03530977815389633
          vf_explained_var: 0.22954629361629486
          vf_loss: 0.0108848057480322
    num_agent_steps_sampled: 385000
    num_agent_steps_trained: 385000
    num_steps_sampled: 385000
    num_steps_trained: 38

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,385,10248.5,385000,-2.3475,-1.98,-2.81,234.75


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 386000
  custom_metrics: {}
  date: 2021-11-05_15-23-14
  done: false
  episode_len_mean: 235.92
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.3591999999999937
  episode_reward_min: -2.809999999999984
  episodes_this_iter: 4
  episodes_total: 1494
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1239109741491263e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.4966616034507751
          entropy_coeff: 0.009999999999999998
          kl: 0.011416342892835019
          policy_loss: 0.030775790247652264
          total_loss: 0.026445260147253673
          vf_explained_var: 0.1660664975643158
          vf_loss: 0.010636084475037125
    num_agent_steps_sampled: 386000
    num_agent_steps_trained: 386000
    num_steps_sampled: 386000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,386,10273,386000,-2.3592,-1.98,-2.81,235.92


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 387000
  custom_metrics: {}
  date: 2021-11-05_15-23-37
  done: false
  episode_len_mean: 237.31
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.3730999999999933
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 3
  episodes_total: 1497
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1239109741491263e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.7196992105907865
          entropy_coeff: 0.009999999999999998
          kl: 0.08968802949298159
          policy_loss: 0.01589473229315546
          total_loss: 0.007614340633153915
          vf_explained_var: 0.1664097011089325
          vf_loss: 0.008916600020084944
    num_agent_steps_sampled: 387000
    num_agent_steps_trained: 387000
    num_steps_sampled: 387000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,387,10296.1,387000,-2.3731,-1.98,-3.3,237.31




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 388000
  custom_metrics: {}
  date: 2021-11-05_15-24-19
  done: false
  episode_len_mean: 238.45
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.384499999999993
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 4
  episodes_total: 1501
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6858664612236893e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.5443590958913167
          entropy_coeff: 0.009999999999999998
          kl: 0.009288501619768949
          policy_loss: 0.0009169795446925693
          total_loss: -0.002938539120886061
          vf_explained_var: 0.15630465745925903
          vf_loss: 0.011588070696840683
    num_agent_steps_sampled: 388000
    num_agent_steps_trained: 388000
    num_steps_sampled: 388000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,388,10337.4,388000,-2.3845,-1.98,-3.3,238.45


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 389000
  custom_metrics: {}
  date: 2021-11-05_15-24-42
  done: false
  episode_len_mean: 240.14
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.401399999999993
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 4
  episodes_total: 1505
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6858664612236893e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.5956531047821045
          entropy_coeff: 0.009999999999999998
          kl: 0.011365939636691478
          policy_loss: 0.018230595688025156
          total_loss: 0.015161917938126459
          vf_explained_var: 0.07891391217708588
          vf_loss: 0.012887851583460968
    num_agent_steps_sampled: 389000
    num_agent_steps_trained: 389000
    num_steps_sampled: 389000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,389,10360.2,389000,-2.4014,-1.98,-3.3,240.14


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 390000
  custom_metrics: {}
  date: 2021-11-05_15-25-05
  done: false
  episode_len_mean: 241.66
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.4165999999999923
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 4
  episodes_total: 1509
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6858664612236893e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.588104134135776
          entropy_coeff: 0.009999999999999998
          kl: 0.00989895066640069
          policy_loss: -0.016551598782340686
          total_loss: -0.019388786289427015
          vf_explained_var: 0.0963413268327713
          vf_loss: 0.013043851260509757
    num_agent_steps_sampled: 390000
    num_agent_steps_trained: 390000
    num_steps_sampled: 390000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,390,10383.8,390000,-2.4166,-1.98,-3.3,241.66


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 391000
  custom_metrics: {}
  date: 2021-11-05_15-25-29
  done: false
  episode_len_mean: 242.78
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.427799999999992
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 3
  episodes_total: 1512
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6858664612236893e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.405977902147505
          entropy_coeff: 0.009999999999999998
          kl: 0.013032390078429677
          policy_loss: -0.012562359538343218
          total_loss: -0.017182612750265332
          vf_explained_var: 0.16834761202335358
          vf_loss: 0.00943952682428062
    num_agent_steps_sampled: 391000
    num_agent_steps_trained: 391000
    num_steps_sampled: 391000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,391,10407.4,391000,-2.4278,-1.98,-3.3,242.78


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 392000
  custom_metrics: {}
  date: 2021-11-05_15-25-53
  done: false
  episode_len_mean: 243.62
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.436199999999992
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 4
  episodes_total: 1516
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6858664612236893e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.4395586967468261
          entropy_coeff: 0.009999999999999998
          kl: 0.030742793456441653
          policy_loss: -0.09384968976179758
          total_loss: -0.09681834297047721
          vf_explained_var: 0.27529945969581604
          vf_loss: 0.011426930176094174
    num_agent_steps_sampled: 392000
    num_agent_steps_trained: 392000
    num_steps_sampled: 392000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,392,10431.7,392000,-2.4362,-1.98,-3.3,243.62


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 393000
  custom_metrics: {}
  date: 2021-11-05_15-26-18
  done: false
  episode_len_mean: 244.29
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.442899999999992
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 4
  episodes_total: 1520
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5287996918355338e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.1472682846917046
          entropy_coeff: 0.009999999999999998
          kl: 0.018290029743088118
          policy_loss: -0.07563799942533175
          total_loss: -0.07621092374126116
          vf_explained_var: 0.20866312086582184
          vf_loss: 0.010899758670065138
    num_agent_steps_sampled: 393000
    num_agent_steps_trained: 393000
    num_steps_sampled: 393000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,393,10456.5,393000,-2.4429,-1.98,-3.3,244.29


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 394000
  custom_metrics: {}
  date: 2021-11-05_15-26-44
  done: false
  episode_len_mean: 244.7
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.4469999999999916
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 4
  episodes_total: 1524
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5287996918355338e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.0275666495164235
          entropy_coeff: 0.009999999999999998
          kl: 0.008765944315245102
          policy_loss: -0.04696683726376957
          total_loss: -0.047566772624850276
          vf_explained_var: 0.30120977759361267
          vf_loss: 0.009675732027325365
    num_agent_steps_sampled: 394000
    num_agent_steps_trained: 394000
    num_steps_sampled: 394000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,394,10481.9,394000,-2.447,-1.98,-3.3,244.7


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 395000
  custom_metrics: {}
  date: 2021-11-05_15-27-09
  done: false
  episode_len_mean: 244.7
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.446999999999992
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 5
  episodes_total: 1529
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5287996918355338e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.1391083406077491
          entropy_coeff: 0.009999999999999998
          kl: 0.010464265212315303
          policy_loss: -0.015221044503980213
          total_loss: -0.013795430047644509
          vf_explained_var: 0.2715610861778259
          vf_loss: 0.012816699076857832
    num_agent_steps_sampled: 395000
    num_agent_steps_trained: 395000
    num_steps_sampled: 395000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,395,10507.3,395000,-2.447,-1.98,-3.3,244.7




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 396000
  custom_metrics: {}
  date: 2021-11-05_15-27-52
  done: false
  episode_len_mean: 244.19
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.441899999999992
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 4
  episodes_total: 1533
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5287996918355338e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.1405529671245151
          entropy_coeff: 0.009999999999999998
          kl: 0.008257328539680723
          policy_loss: 0.02279690975944201
          total_loss: 0.02098065556751357
          vf_explained_var: 0.2190903127193451
          vf_loss: 0.00958927565564712
    num_agent_steps_sampled: 396000
    num_agent_steps_trained: 396000
    num_steps_sampled: 396000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,396,10550,396000,-2.4419,-1.98,-3.3,244.19


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 397000
  custom_metrics: {}
  date: 2021-11-05_15-28-17
  done: false
  episode_len_mean: 244.49
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.4448999999999916
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 4
  episodes_total: 1537
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5287996918355338e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.155055336157481
          entropy_coeff: 0.009999999999999998
          kl: 0.008105125855294817
          policy_loss: 0.03304167501628399
          total_loss: 0.030331518459651204
          vf_explained_var: 0.18960821628570557
          vf_loss: 0.008840397590150435
    num_agent_steps_sampled: 397000
    num_agent_steps_trained: 397000
    num_steps_sampled: 397000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,397,10575.2,397000,-2.4449,-1.98,-3.3,244.49


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 398000
  custom_metrics: {}
  date: 2021-11-05_15-28-42
  done: false
  episode_len_mean: 244.87
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.448699999999991
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 4
  episodes_total: 1541
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5287996918355338e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.0352643887201944
          entropy_coeff: 0.009999999999999998
          kl: 0.0061719211185497615
          policy_loss: 0.021611810310019387
          total_loss: 0.022170174618562064
          vf_explained_var: 0.10219739377498627
          vf_loss: 0.010911006625327799
    num_agent_steps_sampled: 398000
    num_agent_steps_trained: 398000
    num_steps_sampled: 398000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,398,10600.5,398000,-2.4487,-1.98,-3.3,244.87


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 399000
  custom_metrics: {}
  date: 2021-11-05_15-29-07
  done: false
  episode_len_mean: 245.12
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.4511999999999916
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 4
  episodes_total: 1545
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5287996918355338e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.2007516874207391
          entropy_coeff: 0.009999999999999998
          kl: 0.006822475032379514
          policy_loss: 0.011469942000177171
          total_loss: 0.011150446203019884
          vf_explained_var: 0.08260497450828552
          vf_loss: 0.011688021653228336
    num_agent_steps_sampled: 399000
    num_agent_steps_trained: 399000
    num_steps_sampled: 399000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,399,10625.1,399000,-2.4512,-1.98,-3.3,245.12


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 400000
  custom_metrics: {}
  date: 2021-11-05_15-29-31
  done: false
  episode_len_mean: 245.92
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.4591999999999916
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 4
  episodes_total: 1549
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5287996918355338e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.1655896319283379
          entropy_coeff: 0.009999999999999998
          kl: 0.011816301021899752
          policy_loss: 0.030018858777152168
          total_loss: 0.028634724269310633
          vf_explained_var: 0.08469702303409576
          vf_loss: 0.010271762756423818
    num_agent_steps_sampled: 400000
    num_agent_steps_trained: 400000
    num_steps_sampled: 400000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,400,10649.6,400000,-2.4592,-1.98,-3.3,245.92


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 401000
  custom_metrics: {}
  date: 2021-11-05_15-29-56
  done: false
  episode_len_mean: 246.5
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.4649999999999914
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 4
  episodes_total: 1553
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5287996918355338e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.1041524330774943
          entropy_coeff: 0.009999999999999998
          kl: 0.007925163175981728
          policy_loss: 0.008819419559505251
          total_loss: 0.01056817149122556
          vf_explained_var: 0.08217009902000427
          vf_loss: 0.01279027315063609
    num_agent_steps_sampled: 401000
    num_agent_steps_trained: 401000
    num_steps_sampled: 401000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,401,10674.4,401000,-2.465,-1.98,-3.3,246.5


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 402000
  custom_metrics: {}
  date: 2021-11-05_15-30-22
  done: false
  episode_len_mean: 246.71
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.4670999999999914
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 4
  episodes_total: 1557
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5287996918355338e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.036670704682668
          entropy_coeff: 0.009999999999999998
          kl: 0.007294685863396132
          policy_loss: -0.03556567057967186
          total_loss: -0.033627609453267523
          vf_explained_var: 0.07098490744829178
          vf_loss: 0.012304766579634613
    num_agent_steps_sampled: 402000
    num_agent_steps_trained: 402000
    num_steps_sampled: 402000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,402,10699.8,402000,-2.4671,-1.98,-3.3,246.71




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 403000
  custom_metrics: {}
  date: 2021-11-05_15-31-06
  done: false
  episode_len_mean: 246.79
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.4678999999999918
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 5
  episodes_total: 1562
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5287996918355338e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.0821275956100889
          entropy_coeff: 0.009999999999999998
          kl: 0.012050911355951951
          policy_loss: -0.0362455304298136
          total_loss: -0.0315565577811665
          vf_explained_var: 0.1620030552148819
          vf_loss: 0.015510249469015334
    num_agent_steps_sampled: 403000
    num_agent_steps_trained: 403000
    num_steps_sampled: 403000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,403,10743.7,403000,-2.4679,-1.98,-3.3,246.79


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 404000
  custom_metrics: {}
  date: 2021-11-05_15-31-31
  done: false
  episode_len_mean: 247.07
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.470699999999991
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 4
  episodes_total: 1566
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5287996918355338e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.8776320391231113
          entropy_coeff: 0.009999999999999998
          kl: 0.005372927696717196
          policy_loss: 0.007350287752019034
          total_loss: 0.01004044082429674
          vf_explained_var: 0.13426148891448975
          vf_loss: 0.011466472533841927
    num_agent_steps_sampled: 404000
    num_agent_steps_trained: 404000
    num_steps_sampled: 404000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,404,10769.2,404000,-2.4707,-1.98,-3.3,247.07


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 405000
  custom_metrics: {}
  date: 2021-11-05_15-31-56
  done: false
  episode_len_mean: 247.88
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.478799999999991
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 4
  episodes_total: 1570
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5287996918355338e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.9540560417705112
          entropy_coeff: 0.009999999999999998
          kl: 0.010168528311380954
          policy_loss: 0.04124148802624809
          total_loss: 0.04421187125974231
          vf_explained_var: 0.07288772612810135
          vf_loss: 0.01251094602048397
    num_agent_steps_sampled: 405000
    num_agent_steps_trained: 405000
    num_steps_sampled: 405000
    num_steps_trained: 405000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,405,10794.6,405000,-2.4788,-2.07,-3.3,247.88


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 406000
  custom_metrics: {}
  date: 2021-11-05_15-32-22
  done: false
  episode_len_mean: 247.94
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.4793999999999907
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 4
  episodes_total: 1574
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5287996918355338e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.9616256475448608
          entropy_coeff: 0.009999999999999998
          kl: 0.008028311419715647
          policy_loss: 0.022703657299280165
          total_loss: 0.025610482030444675
          vf_explained_var: 0.11391689628362656
          vf_loss: 0.012523080729362037
    num_agent_steps_sampled: 406000
    num_agent_steps_trained: 406000
    num_steps_sampled: 406000
    num_steps_trained: 406000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,406,10820.4,406000,-2.4794,-2.07,-3.3,247.94


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 407000
  custom_metrics: {}
  date: 2021-11-05_15-32-48
  done: false
  episode_len_mean: 248.23
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.4822999999999906
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 4
  episodes_total: 1578
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5287996918355338e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.9189874821239048
          entropy_coeff: 0.009999999999999998
          kl: 0.003570709817368072
          policy_loss: -0.06163938186234898
          total_loss: -0.058604505078660114
          vf_explained_var: 0.15560610592365265
          vf_loss: 0.012224753387272358
    num_agent_steps_sampled: 407000
    num_agent_steps_trained: 407000
    num_steps_sampled: 407000
    num_steps_trained: 407000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,407,10846.3,407000,-2.4823,-2.07,-3.3,248.23


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 408000
  custom_metrics: {}
  date: 2021-11-05_15-33-14
  done: false
  episode_len_mean: 248.77
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.487699999999991
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 5
  episodes_total: 1583
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2643998459177669e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.8932886905140347
          entropy_coeff: 0.009999999999999998
          kl: 0.007456582684026121
          policy_loss: -0.019407027380334008
          total_loss: -0.012819006045659383
          vf_explained_var: 0.12968212366104126
          vf_loss: 0.01552091126019756
    num_agent_steps_sampled: 408000
    num_agent_steps_trained: 408000
    num_steps_sampled: 408000
    num_steps_trained: 408000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,408,10871.8,408000,-2.4877,-2.07,-3.3,248.77


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 409000
  custom_metrics: {}
  date: 2021-11-05_15-33-39
  done: false
  episode_len_mean: 248.33
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.483299999999991
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 4
  episodes_total: 1587
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2643998459177669e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.9096834394666884
          entropy_coeff: 0.009999999999999998
          kl: 0.00647388981107879
          policy_loss: 0.01587738386458821
          total_loss: 0.01760469377040863
          vf_explained_var: 0.12339456379413605
          vf_loss: 0.010824143120812046
    num_agent_steps_sampled: 409000
    num_agent_steps_trained: 409000
    num_steps_sampled: 409000
    num_steps_trained: 409000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,409,10897.3,409000,-2.4833,-2.07,-3.3,248.33




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 410000
  custom_metrics: {}
  date: 2021-11-05_15-34-23
  done: false
  episode_len_mean: 247.44
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.474399999999991
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 4
  episodes_total: 1591
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2643998459177669e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.9928777721193102
          entropy_coeff: 0.009999999999999998
          kl: 0.011818126145136729
          policy_loss: 0.03577723933590783
          total_loss: 0.037206912206278905
          vf_explained_var: 0.1528702974319458
          vf_loss: 0.011358450094444885
    num_agent_steps_sampled: 410000
    num_agent_steps_trained: 410000
    num_steps_sampled: 410000
    num_steps_trained: 410000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,410,10940.8,410000,-2.4744,-2.06,-3.3,247.44


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 411000
  custom_metrics: {}
  date: 2021-11-05_15-34-49
  done: false
  episode_len_mean: 246.61
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4660999999999915
  episode_reward_min: -3.2999999999999736
  episodes_this_iter: 4
  episodes_total: 1595
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2643998459177669e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.800333391295539
          entropy_coeff: 0.009999999999999998
          kl: 0.008859568364325474
          policy_loss: 0.02672650987903277
          total_loss: 0.02979230470955372
          vf_explained_var: 0.13397864997386932
          vf_loss: 0.011069125361326667
    num_agent_steps_sampled: 411000
    num_agent_steps_trained: 411000
    num_steps_sampled: 411000
    num_steps_trained: 411000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,411,10967.2,411000,-2.4661,-2.06,-3.3,246.61


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 412000
  custom_metrics: {}
  date: 2021-11-05_15-35-15
  done: false
  episode_len_mean: 244.92
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.449199999999992
  episode_reward_min: -2.8899999999999824
  episodes_this_iter: 4
  episodes_total: 1599
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2643998459177669e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.7985426478915745
          entropy_coeff: 0.009999999999999998
          kl: 0.007934901540525235
          policy_loss: -0.022811822013722526
          total_loss: -0.019538318365812303
          vf_explained_var: 0.29609501361846924
          vf_loss: 0.011258930909550852
    num_agent_steps_sampled: 412000
    num_agent_steps_trained: 412000
    num_steps_sampled: 412000
    num_steps_trained: 412000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,412,10993,412000,-2.4492,-2.06,-2.89,244.92


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 413000
  custom_metrics: {}
  date: 2021-11-05_15-35-41
  done: false
  episode_len_mean: 243.25
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4324999999999917
  episode_reward_min: -2.819999999999984
  episodes_this_iter: 5
  episodes_total: 1604
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2643998459177669e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.642191203435262
          entropy_coeff: 0.009999999999999998
          kl: 0.004686935099127254
          policy_loss: -0.023711100551817153
          total_loss: -0.015029353400071463
          vf_explained_var: 0.16232436895370483
          vf_loss: 0.015103660265190734
    num_agent_steps_sampled: 413000
    num_agent_steps_trained: 413000
    num_steps_sampled: 413000
    num_steps_trained: 413000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,413,11018.7,413000,-2.4325,-2.06,-2.82,243.25


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 414000
  custom_metrics: {}
  date: 2021-11-05_15-36-07
  done: false
  episode_len_mean: 241.67
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.4166999999999925
  episode_reward_min: -2.7799999999999847
  episodes_this_iter: 4
  episodes_total: 1608
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.3219992295888346e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.69635937611262
          entropy_coeff: 0.009999999999999998
          kl: 0.004651536706864571
          policy_loss: 0.026883410745196873
          total_loss: 0.03150503788557318
          vf_explained_var: 0.1270812600851059
          vf_loss: 0.011585221180899276
    num_agent_steps_sampled: 414000
    num_agent_steps_trained: 414000
    num_steps_sampled: 414000
    num_steps_trained: 414000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,414,11045,414000,-2.4167,-2.06,-2.78,241.67


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 415000
  custom_metrics: {}
  date: 2021-11-05_15-36-33
  done: false
  episode_len_mean: 240.3
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.402999999999993
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 1612
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1609996147944173e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.6479782554838392
          entropy_coeff: 0.009999999999999998
          kl: 0.00847232963108843
          policy_loss: -0.023137358286314542
          total_loss: -0.017065848575698005
          vf_explained_var: 0.10814766585826874
          vf_loss: 0.012551294494834211
    num_agent_steps_sampled: 415000
    num_agent_steps_trained: 415000
    num_steps_sampled: 415000
    num_steps_trained: 415000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,415,11071.1,415000,-2.403,-2.06,-2.71,240.3


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 416000
  custom_metrics: {}
  date: 2021-11-05_15-36-59
  done: false
  episode_len_mean: 239.31
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.393099999999993
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 5
  episodes_total: 1617
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1609996147944173e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.5619924353228675
          entropy_coeff: 0.009999999999999998
          kl: 0.004426029410836287
          policy_loss: -0.034959306981828475
          total_loss: -0.024711662903428078
          vf_explained_var: 0.0985604003071785
          vf_loss: 0.01586756928720408
    num_agent_steps_sampled: 416000
    num_agent_steps_trained: 416000
    num_steps_sampled: 416000
    num_steps_trained: 416000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,416,11097.1,416000,-2.3931,-2.06,-2.71,239.31




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 417000
  custom_metrics: {}
  date: 2021-11-05_15-37-44
  done: false
  episode_len_mean: 238.43
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.384299999999993
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 1621
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5804998073972086e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.5857033292452495
          entropy_coeff: 0.009999999999999998
          kl: 0.008566803503687402
          policy_loss: 0.018208368743459383
          total_loss: 0.024536847033434443
          vf_explained_var: 0.10450699925422668
          vf_loss: 0.012185509968549012
    num_agent_steps_sampled: 417000
    num_agent_steps_trained: 417000
    num_steps_sampled: 417000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,417,11141.7,417000,-2.3843,-2.03,-2.71,238.43


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 418000
  custom_metrics: {}
  date: 2021-11-05_15-38-10
  done: false
  episode_len_mean: 238.05
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.3804999999999934
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 1625
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5804998073972086e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.6503052287631564
          entropy_coeff: 0.009999999999999998
          kl: 0.005930188715167459
          policy_loss: -0.01634103837940428
          total_loss: -0.009466336046655973
          vf_explained_var: 0.05784133821725845
          vf_loss: 0.013377754183279144
    num_agent_steps_sampled: 418000
    num_agent_steps_trained: 418000
    num_steps_sampled: 418000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,418,11168,418000,-2.3805,-2.03,-2.71,238.05


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 419000
  custom_metrics: {}
  date: 2021-11-05_15-38-36
  done: false
  episode_len_mean: 238.06
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.3805999999999927
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 5
  episodes_total: 1630
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5804998073972086e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.5845402605003781
          entropy_coeff: 0.009999999999999998
          kl: 0.002890810099927658
          policy_loss: -0.021933324221107695
          total_loss: -0.012914755526516173
          vf_explained_var: 0.18858367204666138
          vf_loss: 0.014863968474997415
    num_agent_steps_sampled: 419000
    num_agent_steps_trained: 419000
    num_steps_sampled: 419000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,419,11193.9,419000,-2.3806,-2.03,-2.71,238.06


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 420000
  custom_metrics: {}
  date: 2021-11-05_15-39-02
  done: false
  episode_len_mean: 237.58
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.3757999999999933
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 1634
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.902499036986043e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.6521967662705316
          entropy_coeff: 0.009999999999999998
          kl: 0.003212952303921352
          policy_loss: 0.004015462183290058
          total_loss: 0.010009567273987665
          vf_explained_var: 0.08542279899120331
          vf_loss: 0.012516072754644685
    num_agent_steps_sampled: 420000
    num_agent_steps_trained: 420000
    num_steps_sampled: 420000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,420,11220,420000,-2.3758,-2.03,-2.71,237.58


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 421000
  custom_metrics: {}
  date: 2021-11-05_15-39-28
  done: false
  episode_len_mean: 237.17
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.3716999999999935
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 1638
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.9512495184930216e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.6227018217245738
          entropy_coeff: 0.009999999999999998
          kl: 0.0034497349125684213
          policy_loss: 0.029530308312839933
          total_loss: 0.03423194686571757
          vf_explained_var: 0.1465514451265335
          vf_loss: 0.010928654184357987
    num_agent_steps_sampled: 421000
    num_agent_steps_trained: 421000
    num_steps_sampled: 421000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,421,11246.1,421000,-2.3717,-2.03,-2.71,237.17


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 422000
  custom_metrics: {}
  date: 2021-11-05_15-39-55
  done: false
  episode_len_mean: 236.8
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.367999999999993
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 1642
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9756247592465108e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.522911567820443
          entropy_coeff: 0.009999999999999998
          kl: 0.019557358445957046
          policy_loss: -0.06064496512214343
          total_loss: -0.05379681214690209
          vf_explained_var: 0.08309946954250336
          vf_loss: 0.012077268461386364
    num_agent_steps_sampled: 422000
    num_agent_steps_trained: 422000
    num_steps_sampled: 422000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,422,11272.4,422000,-2.368,-2.03,-2.71,236.8


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 423000
  custom_metrics: {}
  date: 2021-11-05_15-40-21
  done: false
  episode_len_mean: 235.72
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.3571999999999935
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 5
  episodes_total: 1647
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9756247592465108e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.5423240578836865
          entropy_coeff: 0.009999999999999998
          kl: 0.003931528527061761
          policy_loss: 0.007676494949393802
          total_loss: 0.016243368801143434
          vf_explained_var: 0.1415799856185913
          vf_loss: 0.013990115550243192
    num_agent_steps_sampled: 423000
    num_agent_steps_trained: 423000
    num_steps_sampled: 423000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,423,11298.8,423000,-2.3572,-2.03,-2.71,235.72




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 424000
  custom_metrics: {}
  date: 2021-11-05_15-41-04
  done: false
  episode_len_mean: 234.82
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.348199999999994
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 1651
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.878123796232554e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.7711785740322536
          entropy_coeff: 0.009999999999999998
          kl: 0.011523138801753024
          policy_loss: 0.02118894077009625
          total_loss: 0.023310944189627966
          vf_explained_var: 0.19315221905708313
          vf_loss: 0.00983378469116158
    num_agent_steps_sampled: 424000
    num_agent_steps_trained: 424000
    num_steps_sampled: 424000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,424,11341.8,424000,-2.3482,-2.03,-2.71,234.82


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 425000
  custom_metrics: {}
  date: 2021-11-05_15-41-30
  done: false
  episode_len_mean: 234.27
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.3426999999999936
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 5
  episodes_total: 1656
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.878123796232554e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.6236748390727573
          entropy_coeff: 0.009999999999999998
          kl: 0.007002020691337426
          policy_loss: -0.02140576384133763
          total_loss: -0.011204165054692163
          vf_explained_var: 0.0490732342004776
          vf_loss: 0.016438343406965334
    num_agent_steps_sampled: 425000
    num_agent_steps_trained: 425000
    num_steps_sampled: 425000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,425,11367.8,425000,-2.3427,-2.03,-2.71,234.27


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 426000
  custom_metrics: {}
  date: 2021-11-05_15-41-55
  done: false
  episode_len_mean: 234.56
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.345599999999994
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 1660
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.878123796232554e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.8714503698878818
          entropy_coeff: 0.009999999999999998
          kl: 0.007116852684038003
          policy_loss: 0.02231485711203681
          total_loss: 0.025311458110809325
          vf_explained_var: 0.2063472718000412
          vf_loss: 0.011711108156790336
    num_agent_steps_sampled: 426000
    num_agent_steps_trained: 426000
    num_steps_sampled: 426000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,426,11392.9,426000,-2.3456,-2.03,-2.71,234.56


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 427000
  custom_metrics: {}
  date: 2021-11-05_15-42-22
  done: false
  episode_len_mean: 234.09
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.3408999999999938
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 1664
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.878123796232554e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.8422571930620405
          entropy_coeff: 0.009999999999999998
          kl: 0.06460291329359365
          policy_loss: 0.03028439059853554
          total_loss: 0.03313567464550336
          vf_explained_var: 0.22068800032138824
          vf_loss: 0.01127385338768363
    num_agent_steps_sampled: 427000
    num_agent_steps_trained: 427000
    num_steps_sampled: 427000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,427,11419.3,427000,-2.3409,-2.03,-2.71,234.09


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 428000
  custom_metrics: {}
  date: 2021-11-05_15-42-48
  done: false
  episode_len_mean: 233.7
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.3369999999999944
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 1668
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.481718569434883e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.5702493621243371
          entropy_coeff: 0.009999999999999998
          kl: 0.029716496990946775
          policy_loss: -0.07895462678538429
          total_loss: -0.07516166302892897
          vf_explained_var: 0.2542342245578766
          vf_loss: 0.00949545641326242
    num_agent_steps_sampled: 428000
    num_agent_steps_trained: 428000
    num_steps_sampled: 428000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,428,11445.9,428000,-2.337,-2.03,-2.71,233.7


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 429000
  custom_metrics: {}
  date: 2021-11-05_15-43-14
  done: false
  episode_len_mean: 233.34
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.333399999999994
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 5
  episodes_total: 1673
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2225778541523246e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.62306250764264
          entropy_coeff: 0.009999999999999998
          kl: 0.014195352643189255
          policy_loss: -0.011771767420901193
          total_loss: -0.007042350620031357
          vf_explained_var: 0.3293197751045227
          vf_loss: 0.010960039217025042
    num_agent_steps_sampled: 429000
    num_agent_steps_trained: 429000
    num_steps_sampled: 429000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,429,11471.8,429000,-2.3334,-2.03,-2.71,233.34


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 430000
  custom_metrics: {}
  date: 2021-11-05_15-43-41
  done: false
  episode_len_mean: 233.02
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.330199999999994
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 1677
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2225778541523246e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.7252592364947001
          entropy_coeff: 0.009999999999999998
          kl: 0.07508239343134258
          policy_loss: 0.04248196892440319
          total_loss: 0.04450431623392635
          vf_explained_var: 0.23963545262813568
          vf_loss: 0.009274936141446232
    num_agent_steps_sampled: 430000
    num_agent_steps_trained: 430000
    num_steps_sampled: 430000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,430,11498.4,430000,-2.3302,-2.03,-2.71,233.02




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 431000
  custom_metrics: {}
  date: 2021-11-05_15-44-25
  done: false
  episode_len_mean: 232.76
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.327599999999994
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 1681
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3338667812284866e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.6132401585578918
          entropy_coeff: 0.009999999999999998
          kl: 0.0045574523247065065
          policy_loss: -0.018577946060233646
          total_loss: -0.014763783829079734
          vf_explained_var: 0.2775662839412689
          vf_loss: 0.00994656543350882
    num_agent_steps_sampled: 431000
    num_agent_steps_trained: 431000
    num_steps_sampled: 431000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,431,11541.9,431000,-2.3276,-2.03,-2.71,232.76


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 432000
  custom_metrics: {}
  date: 2021-11-05_15-44-50
  done: false
  episode_len_mean: 232.46
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.3245999999999944
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 5
  episodes_total: 1686
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6669333906142433e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.5083500607146157
          entropy_coeff: 0.009999999999999998
          kl: 0.0042870908411968695
          policy_loss: -0.007244126001993815
          total_loss: 0.0004343830876880222
          vf_explained_var: 0.32464075088500977
          vf_loss: 0.012762007748501168
    num_agent_steps_sampled: 432000
    num_agent_steps_trained: 432000
    num_steps_sampled: 432000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,432,11567.7,432000,-2.3246,-2.03,-2.71,232.46


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 433000
  custom_metrics: {}
  date: 2021-11-05_15-45-17
  done: false
  episode_len_mean: 232.41
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.3240999999999947
  episode_reward_min: -2.709999999999986
  episodes_this_iter: 4
  episodes_total: 1690
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.334666953071217e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.6426402986049652
          entropy_coeff: 0.009999999999999998
          kl: 0.017972555101204712
          policy_loss: 0.04306894267598788
          total_loss: 0.04631177162130674
          vf_explained_var: 0.4107741415500641
          vf_loss: 0.009669231747587522
    num_agent_steps_sampled: 433000
    num_agent_steps_trained: 433000
    num_steps_sampled: 433000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,433,11594,433000,-2.3241,-2.03,-2.71,232.41


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 434000
  custom_metrics: {}
  date: 2021-11-05_15-45-43
  done: false
  episode_len_mean: 232.01
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.3200999999999947
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 4
  episodes_total: 1694
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.334666953071217e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.9161826113859812
          entropy_coeff: 0.009999999999999998
          kl: 0.046632924674032
          policy_loss: -0.009349249344733026
          total_loss: -0.009700706725319226
          vf_explained_var: 0.5357998013496399
          vf_loss: 0.008810370244706671
    num_agent_steps_sampled: 434000
    num_agent_steps_trained: 434000
    num_steps_sampled: 434000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,434,11620,434000,-2.3201,-2.03,-2.45,232.01


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 435000
  custom_metrics: {}
  date: 2021-11-05_15-46-07
  done: false
  episode_len_mean: 232.51
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.3250999999999946
  episode_reward_min: -2.4999999999999907
  episodes_this_iter: 4
  episodes_total: 1698
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2502000429606824e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.8761776195632087
          entropy_coeff: 0.009999999999999998
          kl: 0.01694327653629492
          policy_loss: 0.017755666623512904
          total_loss: 0.017583851764599482
          vf_explained_var: 0.45258665084838867
          vf_loss: 0.008589962549093697
    num_agent_steps_sampled: 435000
    num_agent_steps_trained: 435000
    num_steps_sampled: 435000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,435,11644.6,435000,-2.3251,-2.03,-2.5,232.51


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 436000
  custom_metrics: {}
  date: 2021-11-05_15-46-31
  done: false
  episode_len_mean: 233.44
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.334399999999994
  episode_reward_min: -2.7799999999999847
  episodes_this_iter: 4
  episodes_total: 1702
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2502000429606824e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.0679854730765024
          entropy_coeff: 0.009999999999999998
          kl: 0.05924027728939603
          policy_loss: 0.045628506110774146
          total_loss: 0.04602626553840107
          vf_explained_var: 0.34667208790779114
          vf_loss: 0.011077614459726546
    num_agent_steps_sampled: 436000
    num_agent_steps_trained: 436000
    num_steps_sampled: 436000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,436,11668.3,436000,-2.3344,-2.03,-2.78,233.44


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 437000
  custom_metrics: {}
  date: 2021-11-05_15-46-53
  done: false
  episode_len_mean: 235.15
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.351499999999994
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 1706
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8753000644410237e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.2169653839535184
          entropy_coeff: 0.009999999999999998
          kl: 0.01600192714793182
          policy_loss: 0.03589156683948305
          total_loss: 0.03172600749466154
          vf_explained_var: 0.1661822497844696
          vf_loss: 0.008004091281650794
    num_agent_steps_sampled: 437000
    num_agent_steps_trained: 437000
    num_steps_sampled: 437000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,437,11690.4,437000,-2.3515,-2.03,-3.04,235.15


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 438000
  custom_metrics: {}
  date: 2021-11-05_15-47-16
  done: false
  episode_len_mean: 236.7
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.366999999999993
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 3
  episodes_total: 1709
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8753000644410237e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.319149731265174
          entropy_coeff: 0.009999999999999998
          kl: 0.017210456700807304
          policy_loss: 0.0066019017663266925
          total_loss: 0.00027097355988290575
          vf_explained_var: 0.2125529795885086
          vf_loss: 0.006860569887794554
    num_agent_steps_sampled: 438000
    num_agent_steps_trained: 438000
    num_steps_sampled: 438000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,438,11712.9,438000,-2.367,-2.03,-3.04,236.7




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 439000
  custom_metrics: {}
  date: 2021-11-05_15-47-55
  done: false
  episode_len_mean: 238.82
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.3881999999999928
  episode_reward_min: -3.2099999999999755
  episodes_this_iter: 4
  episodes_total: 1713
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8753000644410237e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.1704005069202847
          entropy_coeff: 0.009999999999999998
          kl: 0.019753866195487055
          policy_loss: 0.028476892039179802
          total_loss: 0.02723069302737713
          vf_explained_var: 0.08416099846363068
          vf_loss: 0.010457805978755156
    num_agent_steps_sampled: 439000
    num_agent_steps_trained: 439000
    num_steps_sampled: 439000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,439,11751.7,439000,-2.3882,-2.03,-3.21,238.82


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 440000
  custom_metrics: {}
  date: 2021-11-05_15-48-16
  done: false
  episode_len_mean: 240.63
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.406299999999993
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 3
  episodes_total: 1716
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8753000644410237e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.2202312270800273
          entropy_coeff: 0.009999999999999998
          kl: 0.03211905739625788
          policy_loss: 0.029653623700141907
          total_loss: 0.023633886956506304
          vf_explained_var: -0.17074409127235413
          vf_loss: 0.006182572761705766
    num_agent_steps_sampled: 440000
    num_agent_steps_trained: 440000
    num_steps_sampled: 440000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,440,11773.4,440000,-2.4063,-2.03,-3.32,240.63


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 441000
  custom_metrics: {}
  date: 2021-11-05_15-48-37
  done: false
  episode_len_mean: 242.64
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.426399999999992
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 3
  episodes_total: 1719
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8129500966615354e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.1451591796345182
          entropy_coeff: 0.009999999999999998
          kl: 0.025618910429909804
          policy_loss: -0.09012624472379685
          total_loss: -0.0896906310485469
          vf_explained_var: 0.20921114087104797
          vf_loss: 0.011887205661171012
    num_agent_steps_sampled: 441000
    num_agent_steps_trained: 441000
    num_steps_sampled: 441000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,441,11793.7,441000,-2.4264,-2.03,-3.44,242.64


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 442000
  custom_metrics: {}
  date: 2021-11-05_15-48-57
  done: false
  episode_len_mean: 245.9
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.4589999999999916
  episode_reward_min: -3.6299999999999666
  episodes_this_iter: 3
  episodes_total: 1722
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.219425144992305e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.1386814329359267
          entropy_coeff: 0.009999999999999998
          kl: 0.036758969113355515
          policy_loss: -0.09793111272156238
          total_loss: -0.09596618803011046
          vf_explained_var: 0.19834402203559875
          vf_loss: 0.013351737262888087
    num_agent_steps_sampled: 442000
    num_agent_steps_trained: 442000
    num_steps_sampled: 442000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,442,11813.7,442000,-2.459,-2.03,-3.63,245.9


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 443000
  custom_metrics: {}
  date: 2021-11-05_15-49-19
  done: false
  episode_len_mean: 248.13
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.4812999999999907
  episode_reward_min: -3.6299999999999666
  episodes_this_iter: 4
  episodes_total: 1726
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.329137717488457e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.0625565813647375
          entropy_coeff: 0.009999999999999998
          kl: 0.009943926103961647
          policy_loss: 0.010979436172379387
          total_loss: 0.012119158274597593
          vf_explained_var: 0.2568429112434387
          vf_loss: 0.011765289430816969
    num_agent_steps_sampled: 443000
    num_agent_steps_trained: 443000
    num_steps_sampled: 443000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,443,11836,443000,-2.4813,-2.03,-3.63,248.13


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 444000
  custom_metrics: {}
  date: 2021-11-05_15-49-42
  done: false
  episode_len_mean: 249.67
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.496699999999991
  episode_reward_min: -3.6299999999999666
  episodes_this_iter: 4
  episodes_total: 1730
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.329137717488457e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.1161949071619246
          entropy_coeff: 0.009999999999999998
          kl: 0.00901388198818671
          policy_loss: 0.008332150429487229
          total_loss: 0.008930718526244164
          vf_explained_var: 0.29274797439575195
          vf_loss: 0.011760514695197344
    num_agent_steps_sampled: 444000
    num_agent_steps_trained: 444000
    num_steps_sampled: 444000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,444,11858.9,444000,-2.4967,-2.03,-3.63,249.67


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 445000
  custom_metrics: {}
  date: 2021-11-05_15-50-05
  done: false
  episode_len_mean: 250.78
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.5077999999999907
  episode_reward_min: -3.6299999999999666
  episodes_this_iter: 3
  episodes_total: 1733
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.329137717488457e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.0429308421081966
          entropy_coeff: 0.009999999999999998
          kl: 0.02328146188837867
          policy_loss: -0.09985292462839021
          total_loss: -0.09815418993433317
          vf_explained_var: 0.3610258400440216
          vf_loss: 0.012128040246251557
    num_agent_steps_sampled: 445000
    num_agent_steps_trained: 445000
    num_steps_sampled: 445000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,445,11882.5,445000,-2.5078,-2.03,-3.63,250.78


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 446000
  custom_metrics: {}
  date: 2021-11-05_15-50-29
  done: false
  episode_len_mean: 251.97
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.5196999999999896
  episode_reward_min: -3.6299999999999666
  episodes_this_iter: 4
  episodes_total: 1737
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.49370657623268e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.0282638523313734
          entropy_coeff: 0.009999999999999998
          kl: 0.021161462858965762
          policy_loss: -0.028449211517969766
          total_loss: -0.026527877731455696
          vf_explained_var: 0.3912953734397888
          vf_loss: 0.012203976729263861
    num_agent_steps_sampled: 446000
    num_agent_steps_trained: 446000
    num_steps_sampled: 446000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,446,11906.1,446000,-2.5197,-2.03,-3.63,251.97




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 447000
  custom_metrics: {}
  date: 2021-11-05_15-51-09
  done: false
  episode_len_mean: 253.73
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.53729999999999
  episode_reward_min: -3.6299999999999666
  episodes_this_iter: 4
  episodes_total: 1741
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4240559864349027e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.9552160435252719
          entropy_coeff: 0.009999999999999998
          kl: 0.018204977770827607
          policy_loss: 0.044776478451159264
          total_loss: 0.04877775559822718
          vf_explained_var: 0.41715389490127563
          vf_loss: 0.013553435044984023
    num_agent_steps_sampled: 447000
    num_agent_steps_trained: 447000
    num_steps_sampled: 447000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,447,11946.2,447000,-2.5373,-2.03,-3.63,253.73


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 448000
  custom_metrics: {}
  date: 2021-11-05_15-51-32
  done: false
  episode_len_mean: 255.4
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.553999999999989
  episode_reward_min: -3.6299999999999666
  episodes_this_iter: 3
  episodes_total: 1744
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4240559864349027e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.9698927700519562
          entropy_coeff: 0.009999999999999998
          kl: 0.02307235768321997
          policy_loss: -0.03347999726732572
          total_loss: -0.032216022411982216
          vf_explained_var: 0.14148971438407898
          vf_loss: 0.010962905331204335
    num_agent_steps_sampled: 448000
    num_agent_steps_trained: 448000
    num_steps_sampled: 448000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,448,11969,448000,-2.554,-2.03,-3.63,255.4


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 449000
  custom_metrics: {}
  date: 2021-11-05_15-51-55
  done: false
  episode_len_mean: 256.57
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.5656999999999885
  episode_reward_min: -3.6299999999999666
  episodes_this_iter: 4
  episodes_total: 1748
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1360839796523544e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.7757232407728831
          entropy_coeff: 0.009999999999999998
          kl: 0.050061060701077846
          policy_loss: -0.04486515720685323
          total_loss: -0.03988698157999251
          vf_explained_var: 0.1913001388311386
          vf_loss: 0.012735408679064777
    num_agent_steps_sampled: 449000
    num_agent_steps_trained: 449000
    num_steps_sampled: 449000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,449,11991.9,449000,-2.5657,-2.03,-3.63,256.57


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 450000
  custom_metrics: {}
  date: 2021-11-05_15-52-19
  done: false
  episode_len_mean: 258.26
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.5825999999999887
  episode_reward_min: -3.6299999999999666
  episodes_this_iter: 4
  episodes_total: 1752
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2041259694785308e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.0179775092336867
          entropy_coeff: 0.009999999999999998
          kl: 0.01734533343091095
          policy_loss: 0.02447036173608568
          total_loss: 0.026710972107119032
          vf_explained_var: 0.28449898958206177
          vf_loss: 0.012420386003537311
    num_agent_steps_sampled: 450000
    num_agent_steps_trained: 450000
    num_steps_sampled: 450000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,450,12015.6,450000,-2.5826,-2.14,-3.63,258.26


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 451000
  custom_metrics: {}
  date: 2021-11-05_15-52-42
  done: false
  episode_len_mean: 259.77
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.5976999999999886
  episode_reward_min: -3.6299999999999666
  episodes_this_iter: 3
  episodes_total: 1755
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2041259694785308e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.2923148439990149
          entropy_coeff: 0.009999999999999998
          kl: 0.05299040254206788
          policy_loss: -0.11420892495661975
          total_loss: -0.11223246227535937
          vf_explained_var: 0.14149610698223114
          vf_loss: 0.014899611483431524
    num_agent_steps_sampled: 451000
    num_agent_steps_trained: 451000
    num_steps_sampled: 451000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,451,12038.5,451000,-2.5977,-2.14,-3.63,259.77


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 452000
  custom_metrics: {}
  date: 2021-11-05_15-53-02
  done: false
  episode_len_mean: 262.26
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.622599999999988
  episode_reward_min: -3.6299999999999666
  episodes_this_iter: 4
  episodes_total: 1759
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.806188954217794e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.200148966577318
          entropy_coeff: 0.009999999999999998
          kl: 0.024830239146764577
          policy_loss: 0.018841383357842762
          total_loss: 0.020724495748678844
          vf_explained_var: 0.14899469912052155
          vf_loss: 0.01388460165924496
    num_agent_steps_sampled: 452000
    num_agent_steps_trained: 452000
    num_steps_sampled: 452000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,452,12059.2,452000,-2.6226,-2.14,-3.63,262.26


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 453000
  custom_metrics: {}
  date: 2021-11-05_15-53-24
  done: false
  episode_len_mean: 263.86
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.638599999999988
  episode_reward_min: -3.6299999999999666
  episodes_this_iter: 3
  episodes_total: 1762
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.209283431326692e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.0730452491177453
          entropy_coeff: 0.009999999999999998
          kl: 0.03167215087516379
          policy_loss: -0.03283924874332216
          total_loss: -0.032622199257214865
          vf_explained_var: 0.027504464611411095
          vf_loss: 0.010947501843070818
    num_agent_steps_sampled: 453000
    num_agent_steps_trained: 453000
    num_steps_sampled: 453000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,453,12081,453000,-2.6386,-2.14,-3.63,263.86


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 454000
  custom_metrics: {}
  date: 2021-11-05_15-53-46
  done: false
  episode_len_mean: 266.31
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.663099999999987
  episode_reward_min: -3.6299999999999666
  episodes_this_iter: 4
  episodes_total: 1766
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0813925146990042e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.0095351768864527
          entropy_coeff: 0.009999999999999998
          kl: 0.00810186467888043
          policy_loss: 0.009450487875276142
          total_loss: 0.013673250211609735
          vf_explained_var: 0.08120651543140411
          vf_loss: 0.014318114187982347
    num_agent_steps_sampled: 454000
    num_agent_steps_trained: 454000
    num_steps_sampled: 454000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,454,12102.6,454000,-2.6631,-2.14,-3.63,266.31


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 455000
  custom_metrics: {}
  date: 2021-11-05_15-54-08
  done: false
  episode_len_mean: 268.27
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.6826999999999863
  episode_reward_min: -3.6299999999999666
  episodes_this_iter: 3
  episodes_total: 1769
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0813925146990042e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.1077241950564913
          entropy_coeff: 0.009999999999999998
          kl: 0.00897505807841436
          policy_loss: 0.05274945083591673
          total_loss: 0.05186864154206382
          vf_explained_var: 0.21545250713825226
          vf_loss: 0.010196435415289468
    num_agent_steps_sampled: 455000
    num_agent_steps_trained: 455000
    num_steps_sampled: 455000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,455,12124.3,455000,-2.6827,-2.14,-3.63,268.27




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 456000
  custom_metrics: {}
  date: 2021-11-05_15-54-48
  done: false
  episode_len_mean: 269.96
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.699599999999987
  episode_reward_min: -3.6299999999999666
  episodes_this_iter: 3
  episodes_total: 1772
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0813925146990042e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.1787651379903157
          entropy_coeff: 0.009999999999999998
          kl: 0.04241265633095901
          policy_loss: -0.0886781027747525
          total_loss: -0.08780221417546272
          vf_explained_var: 0.2882891893386841
          vf_loss: 0.012663541537606054
    num_agent_steps_sampled: 456000
    num_agent_steps_trained: 456000
    num_steps_sampled: 456000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,456,12164.5,456000,-2.6996,-2.14,-3.63,269.96


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 457000
  custom_metrics: {}
  date: 2021-11-05_15-55-08
  done: false
  episode_len_mean: 273.31
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.7330999999999857
  episode_reward_min: -3.6299999999999666
  episodes_this_iter: 4
  episodes_total: 1776
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6220887720485055e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.1793490568796794
          entropy_coeff: 0.009999999999999998
          kl: 0.014434653370619098
          policy_loss: -0.0067586221214797765
          total_loss: -0.003810272779729631
          vf_explained_var: 0.00654440326616168
          vf_loss: 0.014741841972702079
    num_agent_steps_sampled: 457000
    num_agent_steps_trained: 457000
    num_steps_sampled: 457000
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,457,12184.3,457000,-2.7331,-2.14,-3.63,273.31


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 458000
  custom_metrics: {}
  date: 2021-11-05_15-55-27
  done: false
  episode_len_mean: 276.51
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.7650999999999852
  episode_reward_min: -3.6299999999999666
  episodes_this_iter: 3
  episodes_total: 1779
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6220887720485055e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.258794579241011
          entropy_coeff: 0.009999999999999998
          kl: 0.01145136187803026
          policy_loss: 0.028060013055801393
          total_loss: 0.027405086987548403
          vf_explained_var: -0.23191282153129578
          vf_loss: 0.011933017436725398
    num_agent_steps_sampled: 458000
    num_agent_steps_trained: 458000
    num_steps_sampled: 458000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,458,12203.4,458000,-2.7651,-2.14,-3.63,276.51


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 459000
  custom_metrics: {}
  date: 2021-11-05_15-55-46
  done: false
  episode_len_mean: 279.5
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.794999999999985
  episode_reward_min: -3.6299999999999666
  episodes_this_iter: 3
  episodes_total: 1782
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6220887720485055e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.2198145177629258
          entropy_coeff: 0.009999999999999998
          kl: 0.015919234855798377
          policy_loss: 0.04363316521048546
          total_loss: 0.042624567614661324
          vf_explained_var: -0.22669722139835358
          vf_loss: 0.0111895483181191
    num_agent_steps_sampled: 459000
    num_agent_steps_trained: 459000
    num_steps_sampled: 459000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,459,12222.9,459000,-2.795,-2.22,-3.63,279.5


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 460000
  custom_metrics: {}
  date: 2021-11-05_15-56-05
  done: false
  episode_len_mean: 281.88
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.818799999999984
  episode_reward_min: -3.659999999999966
  episodes_this_iter: 2
  episodes_total: 1784
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6220887720485055e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.2756656699710422
          entropy_coeff: 0.009999999999999998
          kl: 0.02077755211933214
          policy_loss: -0.0905645016166899
          total_loss: -0.09231385356850094
          vf_explained_var: -0.1174687072634697
          vf_loss: 0.011007304810401465
    num_agent_steps_sampled: 460000
    num_agent_steps_trained: 460000
    num_steps_sampled: 460000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,460,12241.5,460000,-2.8188,-2.22,-3.66,281.88


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 461000
  custom_metrics: {}
  date: 2021-11-05_15-56-24
  done: false
  episode_len_mean: 285.18
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.851799999999983
  episode_reward_min: -3.659999999999966
  episodes_this_iter: 3
  episodes_total: 1787
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4331331580727588e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.2443876637352838
          entropy_coeff: 0.009999999999999998
          kl: 0.012026960915037459
          policy_loss: -0.11073677920632892
          total_loss: -0.10778093478745884
          vf_explained_var: 0.10730095207691193
          vf_loss: 0.015399719153841336
    num_agent_steps_sampled: 461000
    num_agent_steps_trained: 461000
    num_steps_sampled: 461000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,461,12260.8,461000,-2.8518,-2.22,-3.66,285.18


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 462000
  custom_metrics: {}
  date: 2021-11-05_15-56-43
  done: false
  episode_len_mean: 288.56
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.885599999999983
  episode_reward_min: -3.659999999999966
  episodes_this_iter: 3
  episodes_total: 1790
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4331331580727588e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.2444969958729215
          entropy_coeff: 0.009999999999999998
          kl: 0.02612491802405188
          policy_loss: -0.11344270457824071
          total_loss: -0.1103084411058161
          vf_explained_var: 0.1802043914794922
          vf_loss: 0.015579233028822475
    num_agent_steps_sampled: 462000
    num_agent_steps_trained: 462000
    num_steps_sampled: 462000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,462,12279.2,462000,-2.8856,-2.22,-3.66,288.56


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 463000
  custom_metrics: {}
  date: 2021-11-05_15-56-59
  done: false
  episode_len_mean: 293.19
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.931899999999982
  episode_reward_min: -4.209999999999955
  episodes_this_iter: 3
  episodes_total: 1793
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.649699737109138e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.257188159889645
          entropy_coeff: 0.009999999999999998
          kl: 0.012764087332358127
          policy_loss: 0.04668773445818159
          total_loss: 0.04627921026613977
          vf_explained_var: -0.25156253576278687
          vf_loss: 0.012163357426308924
    num_agent_steps_sampled: 463000
    num_agent_steps_trained: 463000
    num_steps_sampled: 463000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,463,12295.9,463000,-2.9319,-2.22,-4.21,293.19


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 464000
  custom_metrics: {}
  date: 2021-11-05_15-57-18
  done: false
  episode_len_mean: 295.9
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.958999999999981
  episode_reward_min: -4.209999999999955
  episodes_this_iter: 2
  episodes_total: 1795
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.649699737109138e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.2970074044333564
          entropy_coeff: 0.009999999999999998
          kl: 0.017902507328328928
          policy_loss: -0.09934283718466759
          total_loss: -0.10182723957631323
          vf_explained_var: 0.2565586268901825
          vf_loss: 0.01048567023438712
    num_agent_steps_sampled: 464000
    num_agent_steps_trained: 464000
    num_steps_sampled: 464000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,464,12314.3,464000,-2.959,-2.22,-4.21,295.9


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 465000
  custom_metrics: {}
  date: 2021-11-05_15-57-37
  done: false
  episode_len_mean: 298.82
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.9881999999999804
  episode_reward_min: -4.209999999999955
  episodes_this_iter: 3
  episodes_total: 1798
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.649699737109138e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.2476305405298869
          entropy_coeff: 0.009999999999999998
          kl: 0.020795971789228224
          policy_loss: -0.09461476703484853
          total_loss: -0.09484164598915312
          vf_explained_var: 0.19668379426002502
          vf_loss: 0.012249423666960663
    num_agent_steps_sampled: 465000
    num_agent_steps_trained: 465000
    num_steps_sampled: 465000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,465,12333.6,465000,-2.9882,-2.22,-4.21,298.82




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 466000
  custom_metrics: {}
  date: 2021-11-05_15-58-15
  done: false
  episode_len_mean: 300.85
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.0084999999999797
  episode_reward_min: -4.209999999999955
  episodes_this_iter: 4
  episodes_total: 1802
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.474549605663707e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.1296665681733025
          entropy_coeff: 0.009999999999999998
          kl: 0.011659726228885168
          policy_loss: 0.032081098109483716
          total_loss: 0.034117542455593744
          vf_explained_var: 0.21486534178256989
          vf_loss: 0.01333310931093163
    num_agent_steps_sampled: 466000
    num_agent_steps_trained: 466000
    num_steps_sampled: 466000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,466,12371.7,466000,-3.0085,-2.22,-4.21,300.85


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 467000
  custom_metrics: {}
  date: 2021-11-05_15-58-35
  done: false
  episode_len_mean: 302.56
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.025599999999979
  episode_reward_min: -4.209999999999955
  episodes_this_iter: 3
  episodes_total: 1805
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.474549605663707e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.1897347503238254
          entropy_coeff: 0.009999999999999998
          kl: 0.07049556906509116
          policy_loss: 0.05248298537400034
          total_loss: 0.05138880502846506
          vf_explained_var: 0.193917378783226
          vf_loss: 0.010803166890723838
    num_agent_steps_sampled: 467000
    num_agent_steps_trained: 467000
    num_steps_sampled: 467000
    num_steps_trained: 467

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,467,12391,467000,-3.0256,-2.22,-4.21,302.56


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 468000
  custom_metrics: {}
  date: 2021-11-05_15-58-53
  done: false
  episode_len_mean: 304.74
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.0473999999999792
  episode_reward_min: -4.209999999999955
  episodes_this_iter: 3
  episodes_total: 1808
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.21182440849556e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.165109051598443
          entropy_coeff: 0.009999999999999998
          kl: 0.010956589265442436
          policy_loss: 0.04139305386278364
          total_loss: 0.04066189378499985
          vf_explained_var: 0.08814980834722519
          vf_loss: 0.010919927063191103
    num_agent_steps_sampled: 468000
    num_agent_steps_trained: 468000
    num_steps_sampled: 468000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,468,12409.6,468000,-3.0474,-2.22,-4.21,304.74


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 469000
  custom_metrics: {}
  date: 2021-11-05_15-59-11
  done: false
  episode_len_mean: 306.87
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.068699999999978
  episode_reward_min: -4.209999999999955
  episodes_this_iter: 2
  episodes_total: 1810
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.21182440849556e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.286893622080485
          entropy_coeff: 0.009999999999999998
          kl: 0.028071043614821732
          policy_loss: -0.07817659791972902
          total_loss: -0.08128635651535458
          vf_explained_var: 0.34549373388290405
          vf_loss: 0.009759177764903548
    num_agent_steps_sampled: 469000
    num_agent_steps_trained: 469000
    num_steps_sampled: 469000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,469,12427.8,469000,-3.0687,-2.22,-4.21,306.87


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 470000
  custom_metrics: {}
  date: 2021-11-05_15-59-29
  done: false
  episode_len_mean: 309.57
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.0956999999999777
  episode_reward_min: -4.279999999999953
  episodes_this_iter: 3
  episodes_total: 1813
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2317736612743334e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.3275819030072955
          entropy_coeff: 0.009999999999999998
          kl: 0.0520176634014199
          policy_loss: 0.061173760973744926
          total_loss: 0.06213175555070241
          vf_explained_var: -0.33875492215156555
          vf_loss: 0.01423381344260027
    num_agent_steps_sampled: 470000
    num_agent_steps_trained: 470000
    num_steps_sampled: 470000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,470,12444.9,470000,-3.0957,-2.22,-4.28,309.57


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 471000
  custom_metrics: {}
  date: 2021-11-05_15-59-44
  done: false
  episode_len_mean: 312.85
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.1284999999999767
  episode_reward_min: -4.399999999999951
  episodes_this_iter: 2
  episodes_total: 1815
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8476604919115006e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.3507319370905557
          entropy_coeff: 0.009999999999999998
          kl: 0.028660452186009402
          policy_loss: -0.07207308030790753
          total_loss: -0.07462939818700155
          vf_explained_var: -0.03767334297299385
          vf_loss: 0.01095100112595699
    num_agent_steps_sampled: 471000
    num_agent_steps_trained: 471000
    num_steps_sampled: 471000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,471,12460.7,471000,-3.1285,-2.22,-4.4,312.85


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 472000
  custom_metrics: {}
  date: 2021-11-05_16-00-00
  done: false
  episode_len_mean: 315.71
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.1570999999999767
  episode_reward_min: -4.819999999999942
  episodes_this_iter: 2
  episodes_total: 1817
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.77149073786725e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.4202677090962728
          entropy_coeff: 0.009999999999999998
          kl: 0.028922088287245726
          policy_loss: -0.14149515098995633
          total_loss: -0.1450696524646547
          vf_explained_var: 0.28732815384864807
          vf_loss: 0.010628176268397106
    num_agent_steps_sampled: 472000
    num_agent_steps_trained: 472000
    num_steps_sampled: 472000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,472,12475.8,472000,-3.1571,-2.22,-4.82,315.71


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 473000
  custom_metrics: {}
  date: 2021-11-05_16-00-16
  done: false
  episode_len_mean: 318.99
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.189899999999975
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 2
  episodes_total: 1819
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.1572361068008773e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.4706815666622586
          entropy_coeff: 0.009999999999999998
          kl: 0.024067805674399166
          policy_loss: -0.05924817522366842
          total_loss: -0.06512754104203648
          vf_explained_var: -0.01859547756612301
          vf_loss: 0.008827449874176333
    num_agent_steps_sampled: 473000
    num_agent_steps_trained: 473000
    num_steps_sampled: 473000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,473,12491.7,473000,-3.1899,-2.22,-4.9,318.99


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 474000
  custom_metrics: {}
  date: 2021-11-05_16-00-32
  done: false
  episode_len_mean: 321.5
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.214999999999975
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 3
  episodes_total: 1822
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.235854160201316e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.5849833620919123
          entropy_coeff: 0.009999999999999998
          kl: 0.012420816793571139
          policy_loss: 0.07122191174162759
          total_loss: 0.06422789676321877
          vf_explained_var: -0.060602184385061264
          vf_loss: 0.008855818750129805
    num_agent_steps_sampled: 474000
    num_agent_steps_trained: 474000
    num_steps_sampled: 474000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,474,12507.7,474000,-3.215,-2.22,-4.9,321.5


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 475000
  custom_metrics: {}
  date: 2021-11-05_16-00-49
  done: false
  episode_len_mean: 323.79
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.2378999999999754
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 2
  episodes_total: 1824
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.235854160201316e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.6146819843186273
          entropy_coeff: 0.009999999999999998
          kl: 0.012760198324664648
          policy_loss: -0.09799622297286988
          total_loss: -0.10522581603791979
          vf_explained_var: 0.40288153290748596
          vf_loss: 0.008917225405780806
    num_agent_steps_sampled: 475000
    num_agent_steps_trained: 475000
    num_steps_sampled: 475000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,475,12525,475000,-3.2379,-2.22,-4.9,323.79


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 476000
  custom_metrics: {}
  date: 2021-11-05_16-01-08
  done: false
  episode_len_mean: 326.44
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.264399999999975
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 3
  episodes_total: 1827
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.235854160201316e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.6607450511720445
          entropy_coeff: 0.009999999999999998
          kl: 0.014872687817912893
          policy_loss: -0.008806319617562824
          total_loss: -0.01748741053872638
          vf_explained_var: 0.4177844524383545
          vf_loss: 0.0079263629236569
    num_agent_steps_sampled: 476000
    num_agent_steps_trained: 476000
    num_steps_sampled: 476000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,476,12544.3,476000,-3.2644,-2.22,-4.9,326.44




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 477000
  custom_metrics: {}
  date: 2021-11-05_16-01-44
  done: false
  episode_len_mean: 328.11
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.281099999999975
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 3
  episodes_total: 1830
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.235854160201316e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.5965289062923855
          entropy_coeff: 0.009999999999999998
          kl: 0.00849741401590263
          policy_loss: -0.0891556743118498
          total_loss: -0.09143605182568232
          vf_explained_var: 0.29384127259254456
          vf_loss: 0.013684913391868274
    num_agent_steps_sampled: 477000
    num_agent_steps_trained: 477000
    num_steps_sampled: 477000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,477,12580.2,477000,-3.2811,-2.22,-4.9,328.11


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 478000
  custom_metrics: {}
  date: 2021-11-05_16-02-05
  done: false
  episode_len_mean: 330.43
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.3042999999999734
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 3
  episodes_total: 1833
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.235854160201316e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.552248271306356
          entropy_coeff: 0.009999999999999998
          kl: 0.013776887804990107
          policy_loss: -0.014030164811346266
          total_loss: -0.021712774369451734
          vf_explained_var: 0.2766515910625458
          vf_loss: 0.00783987386384979
    num_agent_steps_sampled: 478000
    num_agent_steps_trained: 478000
    num_steps_sampled: 478000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,478,12601.1,478000,-3.3043,-2.22,-4.9,330.43


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 479000
  custom_metrics: {}
  date: 2021-11-05_16-02-23
  done: false
  episode_len_mean: 333.21
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.3320999999999725
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 3
  episodes_total: 1836
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.235854160201316e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.6125381098853218
          entropy_coeff: 0.009999999999999998
          kl: 0.011873301668035236
          policy_loss: 0.058109836942619744
          total_loss: 0.053296188596222135
          vf_explained_var: 0.07131797075271606
          vf_loss: 0.011311734567344602
    num_agent_steps_sampled: 479000
    num_agent_steps_trained: 479000
    num_steps_sampled: 479000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,479,12619,479000,-3.3321,-2.22,-4.9,333.21


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 480000
  custom_metrics: {}
  date: 2021-11-05_16-02-41
  done: false
  episode_len_mean: 335.77
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.357699999999972
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 3
  episodes_total: 1839
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.235854160201316e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.581745179494222
          entropy_coeff: 0.009999999999999998
          kl: 0.01621681583002549
          policy_loss: 0.1097525523768531
          total_loss: 0.10310576193862492
          vf_explained_var: 0.33912426233291626
          vf_loss: 0.009170661137128869
    num_agent_steps_sampled: 480000
    num_agent_steps_trained: 480000
    num_steps_sampled: 480000
    num_steps_trained: 4800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,480,12637.4,480000,-3.3577,-2.22,-4.9,335.77


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 481000
  custom_metrics: {}
  date: 2021-11-05_16-03-01
  done: false
  episode_len_mean: 337.34
  episode_media: {}
  episode_reward_max: -2.479999999999991
  episode_reward_mean: -3.3733999999999718
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 3
  episodes_total: 1842
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.235854160201316e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.4269441498650446
          entropy_coeff: 0.009999999999999998
          kl: 0.010380822578104442
          policy_loss: 0.07260704785585403
          total_loss: 0.0707028969294495
          vf_explained_var: 0.2210659682750702
          vf_loss: 0.012365291751403775
    num_agent_steps_sampled: 481000
    num_agent_steps_trained: 481000
    num_steps_sampled: 481000
    num_steps_trained: 481

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,481,12657.1,481000,-3.3734,-2.48,-4.9,337.34


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 482000
  custom_metrics: {}
  date: 2021-11-05_16-03-21
  done: false
  episode_len_mean: 338.83
  episode_media: {}
  episode_reward_max: -2.479999999999991
  episode_reward_mean: -3.388299999999972
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 3
  episodes_total: 1845
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.235854160201316e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.5640735427538555
          entropy_coeff: 0.009999999999999998
          kl: 0.026590346271189544
          policy_loss: 0.04082351815369394
          total_loss: 0.03717737901541922
          vf_explained_var: -0.23257726430892944
          vf_loss: 0.011994593971434774
    num_agent_steps_sampled: 482000
    num_agent_steps_trained: 482000
    num_steps_sampled: 482000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,482,12676.6,482000,-3.3883,-2.48,-4.9,338.83


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 483000
  custom_metrics: {}
  date: 2021-11-05_16-03-39
  done: false
  episode_len_mean: 341.76
  episode_media: {}
  episode_reward_max: -2.479999999999991
  episode_reward_mean: -3.4175999999999713
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 2
  episodes_total: 1847
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.353781240301973e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.4932224286927118
          entropy_coeff: 0.009999999999999998
          kl: 0.014736497243460395
          policy_loss: -0.0953501949707667
          total_loss: -0.09833599362108443
          vf_explained_var: -0.3132988512516022
          vf_loss: 0.01194642583011753
    num_agent_steps_sampled: 483000
    num_agent_steps_trained: 483000
    num_steps_sampled: 483000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,483,12694.5,483000,-3.4176,-2.48,-4.9,341.76


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 484000
  custom_metrics: {}
  date: 2021-11-05_16-03-58
  done: false
  episode_len_mean: 343.92
  episode_media: {}
  episode_reward_max: -2.479999999999991
  episode_reward_mean: -3.43919999999997
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 3
  episodes_total: 1850
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.353781240301973e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.4866279310650297
          entropy_coeff: 0.009999999999999998
          kl: 0.013851984022881918
          policy_loss: -0.08343180368343989
          total_loss: -0.08621766749355528
          vf_explained_var: 0.04814211651682854
          vf_loss: 0.012080417475145724
    num_agent_steps_sampled: 484000
    num_agent_steps_trained: 484000
    num_steps_sampled: 484000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,484,12713.4,484000,-3.4392,-2.48,-4.9,343.92


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 485000
  custom_metrics: {}
  date: 2021-11-05_16-04-20
  done: false
  episode_len_mean: 344.27
  episode_media: {}
  episode_reward_max: -2.479999999999991
  episode_reward_mean: -3.44269999999997
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 4
  episodes_total: 1854
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.353781240301973e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.3922930147912767
          entropy_coeff: 0.009999999999999998
          kl: 0.0208663118707628
          policy_loss: 0.021319609549310472
          total_loss: 0.020287078205082153
          vf_explained_var: 0.2290000468492508
          vf_loss: 0.01289040024081866
    num_agent_steps_sampled: 485000
    num_agent_steps_trained: 485000
    num_steps_sampled: 485000
    num_steps_trained: 48500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,485,12736,485000,-3.4427,-2.48,-4.9,344.27


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 486000
  custom_metrics: {}
  date: 2021-11-05_16-04-40
  done: false
  episode_len_mean: 344.45
  episode_media: {}
  episode_reward_max: -2.479999999999991
  episode_reward_mean: -3.444499999999971
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 3
  episodes_total: 1857
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4030671860452956e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.421186669667562
          entropy_coeff: 0.009999999999999998
          kl: 0.02440838178180379
          policy_loss: -0.08211253426141209
          total_loss: -0.08398195894228087
          vf_explained_var: 0.3384001553058624
          vf_loss: 0.01234244398979677
    num_agent_steps_sampled: 486000
    num_agent_steps_trained: 486000
    num_steps_sampled: 486000
    num_steps_trained: 486

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,486,12756,486000,-3.4445,-2.48,-4.9,344.45




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 487000
  custom_metrics: {}
  date: 2021-11-05_16-05-17
  done: false
  episode_len_mean: 345.58
  episode_media: {}
  episode_reward_max: -2.479999999999991
  episode_reward_mean: -3.45579999999997
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 3
  episodes_total: 1860
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.104600779067944e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.519264272848765
          entropy_coeff: 0.009999999999999998
          kl: 0.011165397929380561
          policy_loss: -0.005905062125788794
          total_loss: -0.009182533456219567
          vf_explained_var: 0.10091277211904526
          vf_loss: 0.011915174165430168
    num_agent_steps_sampled: 487000
    num_agent_steps_trained: 487000
    num_steps_sampled: 487000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,487,12792.4,487000,-3.4558,-2.48,-4.9,345.58


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 488000
  custom_metrics: {}
  date: 2021-11-05_16-05-39
  done: false
  episode_len_mean: 346.12
  episode_media: {}
  episode_reward_max: -2.479999999999991
  episode_reward_mean: -3.46119999999997
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 4
  episodes_total: 1864
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.104600779067944e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.4046102457576328
          entropy_coeff: 0.009999999999999998
          kl: 0.016249673854847893
          policy_loss: 0.024438692298200396
          total_loss: 0.021925784854425326
          vf_explained_var: 0.4234102964401245
          vf_loss: 0.011533196322205994
    num_agent_steps_sampled: 488000
    num_agent_steps_trained: 488000
    num_steps_sampled: 488000
    num_steps_trained: 48

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,488,12815.2,488000,-3.4612,-2.48,-4.9,346.12


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 489000
  custom_metrics: {}
  date: 2021-11-05_16-06-01
  done: false
  episode_len_mean: 345.75
  episode_media: {}
  episode_reward_max: -2.479999999999991
  episode_reward_mean: -3.4574999999999703
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 3
  episodes_total: 1867
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.104600779067944e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.4604831986957125
          entropy_coeff: 0.009999999999999998
          kl: 0.008920368152623344
          policy_loss: 0.04853674901856316
          total_loss: 0.04122044742107391
          vf_explained_var: 0.6578076481819153
          vf_loss: 0.007288530695950613
    num_agent_steps_sampled: 489000
    num_agent_steps_trained: 489000
    num_steps_sampled: 489000
    num_steps_trained: 48

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,489,12836.7,489000,-3.4575,-2.48,-4.9,345.75


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 490000
  custom_metrics: {}
  date: 2021-11-05_16-06-24
  done: false
  episode_len_mean: 345.33
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -3.4532999999999703
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 4
  episodes_total: 1871
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.104600779067944e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.4188382612334358
          entropy_coeff: 0.009999999999999998
          kl: 0.01992830749948321
          policy_loss: 0.026968775110112295
          total_loss: 0.025302802522977192
          vf_explained_var: 0.3712540566921234
          vf_loss: 0.01252240821098288
    num_agent_steps_sampled: 490000
    num_agent_steps_trained: 490000
    num_steps_sampled: 490000
    num_steps_trained: 49

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,490,12859.5,490000,-3.4533,-2.57,-4.9,345.33


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 491000
  custom_metrics: {}
  date: 2021-11-05_16-06-46
  done: false
  episode_len_mean: 344.65
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -3.4464999999999697
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 3
  episodes_total: 1874
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.104600779067944e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.3253740972942776
          entropy_coeff: 0.009999999999999998
          kl: 0.009412787582170602
          policy_loss: 0.020788076519966125
          total_loss: 0.016981165773338743
          vf_explained_var: 0.4246692359447479
          vf_loss: 0.009446831751200887
    num_agent_steps_sampled: 491000
    num_agent_steps_trained: 491000
    num_steps_sampled: 491000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,491,12882.1,491000,-3.4465,-2.57,-4.9,344.65


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 492000
  custom_metrics: {}
  date: 2021-11-05_16-07-10
  done: false
  episode_len_mean: 341.72
  episode_media: {}
  episode_reward_max: -2.4699999999999913
  episode_reward_mean: -3.417199999999971
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 4
  episodes_total: 1878
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.104600779067944e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.0444013675053914
          entropy_coeff: 0.009999999999999998
          kl: 0.012587274308500243
          policy_loss: 0.0025215220120218063
          total_loss: 0.003226242380009757
          vf_explained_var: 0.43218252062797546
          vf_loss: 0.01114873205208116
    num_agent_steps_sampled: 492000
    num_agent_steps_trained: 492000
    num_steps_sampled: 492000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,492,12906.1,492000,-3.4172,-2.47,-4.9,341.72


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 493000
  custom_metrics: {}
  date: 2021-11-05_16-07-34
  done: false
  episode_len_mean: 338.95
  episode_media: {}
  episode_reward_max: -2.4699999999999913
  episode_reward_mean: -3.3894999999999715
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 4
  episodes_total: 1882
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.104600779067944e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.193630215856764
          entropy_coeff: 0.009999999999999998
          kl: 0.037437594756177084
          policy_loss: 0.052459894203477436
          total_loss: 0.0479726599322425
          vf_explained_var: 0.5458253026008606
          vf_loss: 0.007449066553575297
    num_agent_steps_sampled: 493000
    num_agent_steps_trained: 493000
    num_steps_sampled: 493000
    num_steps_trained: 49

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,493,12930.1,493000,-3.3895,-2.47,-4.9,338.95


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 494000
  custom_metrics: {}
  date: 2021-11-05_16-07-58
  done: false
  episode_len_mean: 335.24
  episode_media: {}
  episode_reward_max: -2.4599999999999915
  episode_reward_mean: -3.3523999999999723
  episode_reward_min: -4.89999999999994
  episodes_this_iter: 4
  episodes_total: 1886
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.156901168601916e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.0663116057713826
          entropy_coeff: 0.009999999999999998
          kl: 0.020258406824869173
          policy_loss: 0.041156700005133946
          total_loss: 0.04032171708014276
          vf_explained_var: 0.5295554399490356
          vf_loss: 0.009828132908377382
    num_agent_steps_sampled: 494000
    num_agent_steps_trained: 494000
    num_steps_sampled: 494000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,494,12953.9,494000,-3.3524,-2.46,-4.9,335.24


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 495000
  custom_metrics: {}
  date: 2021-11-05_16-08-14
  done: false
  episode_len_mean: 335.89
  episode_media: {}
  episode_reward_max: -2.4599999999999915
  episode_reward_mean: -3.358899999999972
  episode_reward_min: -4.939999999999939
  episodes_this_iter: 2
  episodes_total: 1888
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.735351752902874e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.1468499614132774
          entropy_coeff: 0.009999999999999998
          kl: 0.03843753098743036
          policy_loss: -0.05574448340468936
          total_loss: -0.05671772642268075
          vf_explained_var: 0.03256822004914284
          vf_loss: 0.010495260146368916
    num_agent_steps_sampled: 495000
    num_agent_steps_trained: 495000
    num_steps_sampled: 495000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,495,12969.7,495000,-3.3589,-2.46,-4.94,335.89




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 496000
  custom_metrics: {}
  date: 2021-11-05_16-08-55
  done: false
  episode_len_mean: 334.05
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.340499999999972
  episode_reward_min: -4.939999999999939
  episodes_this_iter: 4
  episodes_total: 1892
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.10302762935431e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.2188361790445117
          entropy_coeff: 0.009999999999999998
          kl: 0.012419661150622553
          policy_loss: 0.008016195396582286
          total_loss: 0.00588099029329088
          vf_explained_var: 0.5000032782554626
          vf_loss: 0.010053150573124487
    num_agent_steps_sampled: 496000
    num_agent_steps_trained: 496000
    num_steps_sampled: 496000
    num_steps_trained: 49

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,496,13010.9,496000,-3.3405,-2.26,-4.94,334.05


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 497000
  custom_metrics: {}
  date: 2021-11-05_16-09-20
  done: false
  episode_len_mean: 329.09
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.2908999999999735
  episode_reward_min: -4.939999999999939
  episodes_this_iter: 4
  episodes_total: 1896
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.10302762935431e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.145950088236067
          entropy_coeff: 0.009999999999999998
          kl: 0.0152621097998461
          policy_loss: -0.010077217138475841
          total_loss: -0.011420774873760012
          vf_explained_var: 0.3840169310569763
          vf_loss: 0.010115944169875648
    num_agent_steps_sampled: 497000
    num_agent_steps_trained: 497000
    num_steps_sampled: 497000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,497,13035.2,497000,-3.2909,-2.26,-4.94,329.09


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 498000
  custom_metrics: {}
  date: 2021-11-05_16-09-44
  done: false
  episode_len_mean: 325.94
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.259399999999975
  episode_reward_min: -4.939999999999939
  episodes_this_iter: 4
  episodes_total: 1900
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.10302762935431e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.0438195758395725
          entropy_coeff: 0.009999999999999998
          kl: 0.04160755308340924
          policy_loss: 0.031193056537045374
          total_loss: 0.0292283167441686
          vf_explained_var: 0.5288776755332947
          vf_loss: 0.008473455600647464
    num_agent_steps_sampled: 498000
    num_agent_steps_trained: 498000
    num_steps_sampled: 498000
    num_steps_trained: 4980

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,498,13060,498000,-3.2594,-2.26,-4.94,325.94


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 499000
  custom_metrics: {}
  date: 2021-11-05_16-10-01
  done: false
  episode_len_mean: 327.26
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.2725999999999744
  episode_reward_min: -4.939999999999939
  episodes_this_iter: 2
  episodes_total: 1902
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0654541444031467e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.004494391547309
          entropy_coeff: 0.009999999999999998
          kl: 0.022900891901185202
          policy_loss: -0.03479831318060557
          total_loss: -0.03312109642558628
          vf_explained_var: 0.12110293656587601
          vf_loss: 0.011722160338993287
    num_agent_steps_sampled: 499000
    num_agent_steps_trained: 499000
    num_steps_sampled: 499000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,499,13076.7,499000,-3.2726,-2.26,-4.94,327.26


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 500000
  custom_metrics: {}
  date: 2021-11-05_16-10-15
  done: false
  episode_len_mean: 331.74
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.3173999999999744
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 2
  episodes_total: 1904
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5981812166047197e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7474776003095839
          entropy_coeff: 0.009999999999999998
          kl: 0.022675252290163057
          policy_loss: -0.09245221134689119
          total_loss: -0.08747659507724974
          vf_explained_var: -0.29315903782844543
          vf_loss: 0.012450390103428315
    num_agent_steps_sampled: 500000
    num_agent_steps_trained: 500000
    num_steps_sampled: 500000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,500,13090,500000,-3.3174,-2.26,-5.83,331.74


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 501000
  custom_metrics: {}
  date: 2021-11-05_16-10-32
  done: false
  episode_len_mean: 332.92
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.3291999999999735
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 3
  episodes_total: 1907
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.39727182490708e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.0235518561469183
          entropy_coeff: 0.009999999999999998
          kl: 0.021521796247163473
          policy_loss: 0.04857984036207199
          total_loss: 0.050304485029644436
          vf_explained_var: -0.16984045505523682
          vf_loss: 0.011960160300239093
    num_agent_steps_sampled: 501000
    num_agent_steps_trained: 501000
    num_steps_sampled: 501000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,501,13107.2,501000,-3.3292,-2.26,-5.83,332.92


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 502000
  custom_metrics: {}
  date: 2021-11-05_16-10-52
  done: false
  episode_len_mean: 330.81
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.308099999999974
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 3
  episodes_total: 1910
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.59590773736062e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.1962521447075738
          entropy_coeff: 0.009999999999999998
          kl: 0.012111803931230567
          policy_loss: 0.019821236613723967
          total_loss: 0.02063335060245461
          vf_explained_var: 0.005273132119327784
          vf_loss: 0.012774637170756857
    num_agent_steps_sampled: 502000
    num_agent_steps_trained: 502000
    num_steps_sampled: 502000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,502,13127.2,502000,-3.3081,-2.26,-5.83,330.81


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 503000
  custom_metrics: {}
  date: 2021-11-05_16-11-07
  done: false
  episode_len_mean: 331.59
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.315899999999973
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 2
  episodes_total: 1912
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.59590773736062e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.0515922009944916
          entropy_coeff: 0.009999999999999998
          kl: 0.013252931600906534
          policy_loss: -0.0839572454492251
          total_loss: -0.0813037011358473
          vf_explained_var: -0.20305587351322174
          vf_loss: 0.013169465141577854
    num_agent_steps_sampled: 503000
    num_agent_steps_trained: 503000
    num_steps_sampled: 503000
    num_steps_trained: 50

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,503,13142.2,503000,-3.3159,-2.26,-5.83,331.59


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 504000
  custom_metrics: {}
  date: 2021-11-05_16-11-22
  done: false
  episode_len_mean: 334.1
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.340999999999973
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 2
  episodes_total: 1914
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.59590773736062e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.9511104418171776
          entropy_coeff: 0.009999999999999998
          kl: 0.00913560076557146
          policy_loss: -0.09030987521012625
          total_loss: -0.08665024307039049
          vf_explained_var: -0.2971719801425934
          vf_loss: 0.013170734321465715
    num_agent_steps_sampled: 504000
    num_agent_steps_trained: 504000
    num_steps_sampled: 504000
    num_steps_trained: 504

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,504,13157.5,504000,-3.341,-2.26,-5.83,334.1


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 505000
  custom_metrics: {}
  date: 2021-11-05_16-11-36
  done: false
  episode_len_mean: 333.55
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.335499999999973
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 2
  episodes_total: 1916
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.59590773736062e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.0235876626438565
          entropy_coeff: 0.009999999999999998
          kl: 0.01894506545569254
          policy_loss: -0.07455249710215463
          total_loss: -0.07337084296676848
          vf_explained_var: -0.19642393290996552
          vf_loss: 0.01141752850704102
    num_agent_steps_sampled: 505000
    num_agent_steps_trained: 505000
    num_steps_sampled: 505000
    num_steps_trained: 50

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,505,13171.8,505000,-3.3355,-2.26,-5.83,333.55




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 506000
  custom_metrics: {}
  date: 2021-11-05_16-12-13
  done: false
  episode_len_mean: 329.71
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.2970999999999733
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 4
  episodes_total: 1920
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.59590773736062e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.0807318925857543
          entropy_coeff: 0.009999999999999998
          kl: 0.027044075343867192
          policy_loss: -0.008849617507722642
          total_loss: -0.0037961653537220423
          vf_explained_var: 0.0519227460026741
          vf_loss: 0.01586077134642336
    num_agent_steps_sampled: 506000
    num_agent_steps_trained: 506000
    num_steps_sampled: 506000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,506,13208.3,506000,-3.2971,-2.26,-5.83,329.71


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 507000
  custom_metrics: {}
  date: 2021-11-05_16-12-31
  done: false
  episode_len_mean: 329.66
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.2965999999999736
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 2
  episodes_total: 1922
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.3938616060409297e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.143528291914198
          entropy_coeff: 0.009999999999999998
          kl: 0.026419174347113345
          policy_loss: 0.0648691616124577
          total_loss: 0.05906462868054708
          vf_explained_var: -0.48456230759620667
          vf_loss: 0.005630747162892173
    num_agent_steps_sampled: 507000
    num_agent_steps_trained: 507000
    num_steps_sampled: 507000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,507,13226,507000,-3.2966,-2.26,-5.83,329.66


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 508000
  custom_metrics: {}
  date: 2021-11-05_16-12-48
  done: false
  episode_len_mean: 329.28
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.2927999999999735
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 3
  episodes_total: 1925
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.090792409061393e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.1891136527061463
          entropy_coeff: 0.009999999999999998
          kl: 0.012962372312708779
          policy_loss: 0.05785580575466156
          total_loss: 0.05718736367093192
          vf_explained_var: 0.01352713629603386
          vf_loss: 0.011222694151284587
    num_agent_steps_sampled: 508000
    num_agent_steps_trained: 508000
    num_steps_sampled: 508000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,508,13243.4,508000,-3.2928,-2.26,-5.83,329.28


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 509000
  custom_metrics: {}
  date: 2021-11-05_16-13-06
  done: false
  episode_len_mean: 329.81
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.298099999999973
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 2
  episodes_total: 1927
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.090792409061393e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.2780551685227288
          entropy_coeff: 0.009999999999999998
          kl: 0.013193673848277894
          policy_loss: -0.08659898208247291
          total_loss: -0.086509503920873
          vf_explained_var: -0.0839468464255333
          vf_loss: 0.012870031353991685
    num_agent_steps_sampled: 509000
    num_agent_steps_trained: 509000
    num_steps_sampled: 509000
    num_steps_trained: 50

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,509,13261.3,509000,-3.2981,-2.26,-5.83,329.81


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 510000
  custom_metrics: {}
  date: 2021-11-05_16-13-27
  done: false
  episode_len_mean: 328.98
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.289799999999973
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 3
  episodes_total: 1930
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.090792409061393e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.2256259799003602
          entropy_coeff: 0.009999999999999998
          kl: 0.013688211387434635
          policy_loss: -0.10197306101520856
          total_loss: -0.09834157485100958
          vf_explained_var: 0.1983439326286316
          vf_loss: 0.01588774433152543
    num_agent_steps_sampled: 510000
    num_agent_steps_trained: 510000
    num_steps_sampled: 510000
    num_steps_trained: 51

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,510,13281.8,510000,-3.2898,-2.26,-5.83,328.98


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 511000
  custom_metrics: {}
  date: 2021-11-05_16-13-45
  done: false
  episode_len_mean: 328.69
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.286899999999973
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 3
  episodes_total: 1933
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.090792409061393e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.210355477862888
          entropy_coeff: 0.009999999999999998
          kl: 0.020212075015787593
          policy_loss: -0.09071173949374092
          total_loss: -0.08706930395629671
          vf_explained_var: 0.0816020667552948
          vf_loss: 0.015745989491956103
    num_agent_steps_sampled: 511000
    num_agent_steps_trained: 511000
    num_steps_sampled: 511000
    num_steps_trained: 51

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,511,13300.4,511000,-3.2869,-2.26,-5.83,328.69


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 512000
  custom_metrics: {}
  date: 2021-11-05_16-14-02
  done: false
  episode_len_mean: 329.23
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.2922999999999734
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 3
  episodes_total: 1936
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2136188613592092e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2994182507197063
          entropy_coeff: 0.009999999999999998
          kl: 0.010930295327033454
          policy_loss: 0.014552529984050327
          total_loss: 0.0139818474650383
          vf_explained_var: -0.09206145256757736
          vf_loss: 0.012423498795316037
    num_agent_steps_sampled: 512000
    num_agent_steps_trained: 512000
    num_steps_sampled: 512000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,512,13317.6,512000,-3.2923,-2.26,-5.83,329.23


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 513000
  custom_metrics: {}
  date: 2021-11-05_16-14-23
  done: false
  episode_len_mean: 328.93
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.289299999999974
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 3
  episodes_total: 1939
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2136188613592092e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.404816296365526
          entropy_coeff: 0.009999999999999998
          kl: 0.012341966660931231
          policy_loss: 0.04029474953810374
          total_loss: 0.034612071182992726
          vf_explained_var: -0.3771614134311676
          vf_loss: 0.00836548444090618
    num_agent_steps_sampled: 513000
    num_agent_steps_trained: 513000
    num_steps_sampled: 513000
    num_steps_trained: 51

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,513,13337.8,513000,-3.2893,-2.26,-5.83,328.93


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 514000
  custom_metrics: {}
  date: 2021-11-05_16-14-44
  done: false
  episode_len_mean: 327.74
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.277399999999974
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 3
  episodes_total: 1942
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2136188613592092e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.30456127193239
          entropy_coeff: 0.009999999999999998
          kl: 0.01569588714404012
          policy_loss: -0.10674090070856943
          total_loss: -0.10377437306774986
          vf_explained_var: 0.12212246656417847
          vf_loss: 0.016012143840392432
    num_agent_steps_sampled: 514000
    num_agent_steps_trained: 514000
    num_steps_sampled: 514000
    num_steps_trained: 51

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,514,13359.4,514000,-3.2774,-2.26,-5.83,327.74


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 515000
  custom_metrics: {}
  date: 2021-11-05_16-15-07
  done: false
  episode_len_mean: 324.7
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.246999999999974
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 4
  episodes_total: 1946
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2136188613592092e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.259361457824707
          entropy_coeff: 0.009999999999999998
          kl: 0.01854650447170331
          policy_loss: 0.004011610605650478
          total_loss: 0.005931193505724271
          vf_explained_var: 0.1008041575551033
          vf_loss: 0.014513195513023271
    num_agent_steps_sampled: 515000
    num_agent_steps_trained: 515000
    num_steps_sampled: 515000
    num_steps_trained: 515

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,515,13381.9,515000,-3.247,-2.26,-5.83,324.7




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 516000
  custom_metrics: {}
  date: 2021-11-05_16-15-47
  done: false
  episode_len_mean: 321.23
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.212299999999976
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 4
  episodes_total: 1950
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2136188613592092e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2947798556751675
          entropy_coeff: 0.009999999999999998
          kl: 0.01332903077647024
          policy_loss: 0.01344433430996206
          total_loss: 0.013710962815417184
          vf_explained_var: 0.32065168023109436
          vf_loss: 0.01321442678777708
    num_agent_steps_sampled: 516000
    num_agent_steps_trained: 516000
    num_steps_sampled: 516000
    num_steps_trained: 51

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,516,13421.7,516000,-3.2123,-2.26,-5.83,321.23


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 517000
  custom_metrics: {}
  date: 2021-11-05_16-16-09
  done: false
  episode_len_mean: 321.88
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.2187999999999755
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 3
  episodes_total: 1953
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2136188613592092e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2825933284229702
          entropy_coeff: 0.009999999999999998
          kl: 0.008887170061952235
          policy_loss: -0.08974316401614083
          total_loss: -0.08795905171169174
          vf_explained_var: 0.277187317609787
          vf_loss: 0.014610042391965786
    num_agent_steps_sampled: 517000
    num_agent_steps_trained: 517000
    num_steps_sampled: 517000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,517,13443.5,517000,-3.2188,-2.26,-5.83,321.88


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 518000
  custom_metrics: {}
  date: 2021-11-05_16-16-30
  done: false
  episode_len_mean: 321.54
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.2153999999999754
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 4
  episodes_total: 1957
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2136188613592092e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2822703798611959
          entropy_coeff: 0.009999999999999998
          kl: 0.016780810949496013
          policy_loss: -0.009025082985560099
          total_loss: -0.008747979170746274
          vf_explained_var: 0.3806195855140686
          vf_loss: 0.013099807242138519
    num_agent_steps_sampled: 518000
    num_agent_steps_trained: 518000
    num_steps_sampled: 518000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,518,13465.3,518000,-3.2154,-2.26,-5.83,321.54


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 519000
  custom_metrics: {}
  date: 2021-11-05_16-16-54
  done: false
  episode_len_mean: 319.08
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.190799999999976
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 4
  episodes_total: 1961
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2136188613592092e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.1087799158361222
          entropy_coeff: 0.009999999999999998
          kl: 0.04996894348456647
          policy_loss: 0.002928995092709859
          total_loss: 0.0023932127488984004
          vf_explained_var: 0.5344576239585876
          vf_loss: 0.010552015569474962
    num_agent_steps_sampled: 519000
    num_agent_steps_trained: 519000
    num_steps_sampled: 519000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,519,13489.3,519000,-3.1908,-2.26,-5.83,319.08


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 520000
  custom_metrics: {}
  date: 2021-11-05_16-17-18
  done: false
  episode_len_mean: 317.71
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.177099999999977
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 3
  episodes_total: 1964
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.820428292038814e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0638911267121633
          entropy_coeff: 0.009999999999999998
          kl: 0.00831186031467897
          policy_loss: -0.1061290511654483
          total_loss: -0.1073533838821782
          vf_explained_var: 0.5536532998085022
          vf_loss: 0.009414580614409513
    num_agent_steps_sampled: 520000
    num_agent_steps_trained: 520000
    num_steps_sampled: 520000
    num_steps_trained: 5200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,520,13513.4,520000,-3.1771,-2.26,-5.83,317.71


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 521000
  custom_metrics: {}
  date: 2021-11-05_16-17-42
  done: false
  episode_len_mean: 316.66
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.1665999999999768
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 4
  episodes_total: 1968
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.820428292038814e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0651198625564575
          entropy_coeff: 0.009999999999999998
          kl: 0.028672468400547442
          policy_loss: 0.017058587736553617
          total_loss: 0.01531201344397333
          vf_explained_var: 0.5796854496002197
          vf_loss: 0.008904621719072263
    num_agent_steps_sampled: 521000
    num_agent_steps_trained: 521000
    num_steps_sampled: 521000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,521,13536.7,521000,-3.1666,-2.26,-5.83,316.66


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 522000
  custom_metrics: {}
  date: 2021-11-05_16-18-01
  done: false
  episode_len_mean: 317.94
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.1793999999999754
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 3
  episodes_total: 1971
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.730642438058222e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.053535062736935
          entropy_coeff: 0.009999999999999998
          kl: 0.02167172502627821
          policy_loss: -0.08850477627582021
          total_loss: -0.08321820836928155
          vf_explained_var: 0.1964995265007019
          vf_loss: 0.01582191560624374
    num_agent_steps_sampled: 522000
    num_agent_steps_trained: 522000
    num_steps_sampled: 522000
    num_steps_trained: 522

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,522,13556,522000,-3.1794,-2.26,-5.83,317.94


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 523000
  custom_metrics: {}
  date: 2021-11-05_16-18-16
  done: false
  episode_len_mean: 321.11
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.2110999999999756
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 2
  episodes_total: 1973
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.095963657087331e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.040884561671151
          entropy_coeff: 0.009999999999999998
          kl: 0.02285666166310065
          policy_loss: -0.10059908562236362
          total_loss: -0.09971307516098023
          vf_explained_var: 0.16799192130565643
          vf_loss: 0.011294853173765457
    num_agent_steps_sampled: 523000
    num_agent_steps_trained: 523000
    num_steps_sampled: 523000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,523,13571.1,523000,-3.2111,-2.26,-5.83,321.11


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 524000
  custom_metrics: {}
  date: 2021-11-05_16-18-34
  done: false
  episode_len_mean: 323.93
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.2392999999999743
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 3
  episodes_total: 1976
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.143945485630997e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0620434761047364
          entropy_coeff: 0.009999999999999998
          kl: 0.008110314195329751
          policy_loss: -0.08846396812134319
          total_loss: -0.08396920354829894
          vf_explained_var: 0.09185893088579178
          vf_loss: 0.015115197334024642
    num_agent_steps_sampled: 524000
    num_agent_steps_trained: 524000
    num_steps_sampled: 524000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,524,13588.6,524000,-3.2393,-2.26,-5.83,323.93


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 525000
  custom_metrics: {}
  date: 2021-11-05_16-18-51
  done: false
  episode_len_mean: 328.72
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.287199999999973
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 3
  episodes_total: 1979
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.143945485630997e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0867143670717876
          entropy_coeff: 0.009999999999999998
          kl: 0.010577429952722244
          policy_loss: 0.05751318815681669
          total_loss: 0.05788924396038055
          vf_explained_var: -0.1976494938135147
          vf_loss: 0.011243196085949118
    num_agent_steps_sampled: 525000
    num_agent_steps_trained: 525000
    num_steps_sampled: 525000
    num_steps_trained: 52

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,525,13605.4,525000,-3.2872,-2.26,-5.83,328.72




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 526000
  custom_metrics: {}
  date: 2021-11-05_16-19-28
  done: false
  episode_len_mean: 330.48
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.304799999999973
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 3
  episodes_total: 1982
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.143945485630997e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2695210576057434
          entropy_coeff: 0.009999999999999998
          kl: 0.017900209280017017
          policy_loss: 0.05539758006731669
          total_loss: 0.05335667033990224
          vf_explained_var: -0.2015843242406845
          vf_loss: 0.01065430250665587
    num_agent_steps_sampled: 526000
    num_agent_steps_trained: 526000
    num_steps_sampled: 526000
    num_steps_trained: 526

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,526,13642.7,526000,-3.3048,-2.26,-5.83,330.48


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 527000
  custom_metrics: {}
  date: 2021-11-05_16-19-49
  done: false
  episode_len_mean: 332.15
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.3214999999999737
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 3
  episodes_total: 1985
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.143945485630997e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.253577901257409
          entropy_coeff: 0.009999999999999998
          kl: 0.020628647355330307
          policy_loss: 0.041681204239527384
          total_loss: 0.039663944807317525
          vf_explained_var: -0.11575508117675781
          vf_loss: 0.010518520448305127
    num_agent_steps_sampled: 527000
    num_agent_steps_trained: 527000
    num_steps_sampled: 527000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,527,13663.4,527000,-3.3215,-2.26,-5.83,332.15


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 528000
  custom_metrics: {}
  date: 2021-11-05_16-20-11
  done: false
  episode_len_mean: 328.6
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.2859999999999734
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 4
  episodes_total: 1989
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.215918228446495e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2636828025182087
          entropy_coeff: 0.009999999999999998
          kl: 0.011350108064060121
          policy_loss: 0.00993059016764164
          total_loss: 0.007798106802834405
          vf_explained_var: 0.41521236300468445
          vf_loss: 0.010504346325372655
    num_agent_steps_sampled: 528000
    num_agent_steps_trained: 528000
    num_steps_sampled: 528000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,528,13686.2,528000,-3.286,-2.26,-5.83,328.6


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 529000
  custom_metrics: {}
  date: 2021-11-05_16-20-34
  done: false
  episode_len_mean: 328.91
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -3.2890999999999737
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 3
  episodes_total: 1992
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.215918228446495e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2570140480995178
          entropy_coeff: 0.009999999999999998
          kl: 0.010599512912955491
          policy_loss: -0.012910640322499806
          total_loss: -0.015417605886856716
          vf_explained_var: 0.4565823972225189
          vf_loss: 0.010063171472089986
    num_agent_steps_sampled: 529000
    num_agent_steps_trained: 529000
    num_steps_sampled: 529000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,529,13709,529000,-3.2891,-2.36,-5.83,328.91


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 530000
  custom_metrics: {}
  date: 2021-11-05_16-20-57
  done: false
  episode_len_mean: 329.62
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -3.296199999999974
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 4
  episodes_total: 1996
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.215918228446495e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.329765212535858
          entropy_coeff: 0.009999999999999998
          kl: 0.03062765305088612
          policy_loss: 0.04619376580748293
          total_loss: 0.041836727865868145
          vf_explained_var: 0.6083846688270569
          vf_loss: 0.008940613575072752
    num_agent_steps_sampled: 530000
    num_agent_steps_trained: 530000
    num_steps_sampled: 530000
    num_steps_trained: 530

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,530,13731.8,530000,-3.2962,-2.36,-5.83,329.62


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 531000
  custom_metrics: {}
  date: 2021-11-05_16-21-20
  done: false
  episode_len_mean: 330.78
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -3.3077999999999737
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 4
  episodes_total: 2000
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3823877342669744e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.2622911956575182
          entropy_coeff: 0.009999999999999998
          kl: 0.01237668308832285
          policy_loss: 0.0032548896968364717
          total_loss: 0.00033630381027857463
          vf_explained_var: 0.5661196112632751
          vf_loss: 0.009704328116236461
    num_agent_steps_sampled: 531000
    num_agent_steps_trained: 531000
    num_steps_sampled: 531000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,531,13754.7,531000,-3.3078,-2.36,-5.83,330.78


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 532000
  custom_metrics: {}
  date: 2021-11-05_16-21-43
  done: false
  episode_len_mean: 326.56
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -3.2655999999999743
  episode_reward_min: -5.82999999999992
  episodes_this_iter: 3
  episodes_total: 2003
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3823877342669744e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.3212039020326403
          entropy_coeff: 0.009999999999999998
          kl: 0.012083667444538114
          policy_loss: -0.017831690278318192
          total_loss: -0.023827176292737326
          vf_explained_var: 0.6756712794303894
          vf_loss: 0.007216549162856406
    num_agent_steps_sampled: 532000
    num_agent_steps_trained: 532000
    num_steps_sampled: 532000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,532,13777.8,532000,-3.2656,-2.36,-5.83,326.56


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 533000
  custom_metrics: {}
  date: 2021-11-05_16-22-05
  done: false
  episode_len_mean: 319.99
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -3.1998999999999755
  episode_reward_min: -5.469999999999928
  episodes_this_iter: 4
  episodes_total: 2007
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3823877342669744e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.2728997985521953
          entropy_coeff: 0.009999999999999998
          kl: 0.017346074562363374
          policy_loss: 0.0948638907323281
          total_loss: 0.0926292729874452
          vf_explained_var: 0.6153889894485474
          vf_loss: 0.010494378923128048
    num_agent_steps_sampled: 533000
    num_agent_steps_trained: 533000
    num_steps_sampled: 533000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,533,13799.8,533000,-3.1999,-2.36,-5.47,319.99




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 534000
  custom_metrics: {}
  date: 2021-11-05_16-22-44
  done: false
  episode_len_mean: 319.69
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -3.196899999999975
  episode_reward_min: -5.469999999999928
  episodes_this_iter: 3
  episodes_total: 2010
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3823877342669744e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.237037256691191
          entropy_coeff: 0.009999999999999998
          kl: 0.025661542253452317
          policy_loss: -0.06355331800878047
          total_loss: -0.06253083876023689
          vf_explained_var: 0.4683665335178375
          vf_loss: 0.013392854306019015
    num_agent_steps_sampled: 534000
    num_agent_steps_trained: 534000
    num_steps_sampled: 534000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,534,13839,534000,-3.1969,-2.36,-5.47,319.69


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 535000
  custom_metrics: {}
  date: 2021-11-05_16-23-09
  done: false
  episode_len_mean: 311.33
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -3.1132999999999775
  episode_reward_min: -5.469999999999928
  episodes_this_iter: 4
  episodes_total: 2014
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0735816014004617e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.1161683473322126
          entropy_coeff: 0.009999999999999998
          kl: 0.011916267068568548
          policy_loss: -0.000360215703646342
          total_loss: 0.0012210499495267867
          vf_explained_var: 0.4201386868953705
          vf_loss: 0.012742950601710214
    num_agent_steps_sampled: 535000
    num_agent_steps_trained: 535000
    num_steps_sampled: 535000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,535,13863.5,535000,-3.1133,-2.36,-5.47,311.33


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 536000
  custom_metrics: {}
  date: 2021-11-05_16-23-32
  done: false
  episode_len_mean: 305.36
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -3.053599999999978
  episode_reward_min: -4.649999999999945
  episodes_this_iter: 4
  episodes_total: 2018
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0735816014004617e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.2603562951087952
          entropy_coeff: 0.009999999999999998
          kl: 0.014388249265399406
          policy_loss: -0.015541172607077493
          total_loss: -0.01894332625799709
          vf_explained_var: 0.48388969898223877
          vf_loss: 0.009201409191721015
    num_agent_steps_sampled: 536000
    num_agent_steps_trained: 536000
    num_steps_sampled: 536000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,536,13886.9,536000,-3.0536,-2.36,-4.65,305.36


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 537000
  custom_metrics: {}
  date: 2021-11-05_16-23-56
  done: false
  episode_len_mean: 301.62
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -3.016199999999979
  episode_reward_min: -4.649999999999945
  episodes_this_iter: 4
  episodes_total: 2022
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0735816014004617e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.0216650419765048
          entropy_coeff: 0.009999999999999998
          kl: 0.01829490687228806
          policy_loss: 0.003846158252822028
          total_loss: 0.003673891888724433
          vf_explained_var: 0.5520312190055847
          vf_loss: 0.010044384240690206
    num_agent_steps_sampled: 537000
    num_agent_steps_trained: 537000
    num_steps_sampled: 537000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,537,13910.8,537000,-3.0162,-2.36,-4.65,301.62


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 538000
  custom_metrics: {}
  date: 2021-11-05_16-24-19
  done: false
  episode_len_mean: 298.17
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.9816999999999796
  episode_reward_min: -4.649999999999945
  episodes_this_iter: 3
  episodes_total: 2025
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0735816014004617e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.0919905808236865
          entropy_coeff: 0.009999999999999998
          kl: 0.02591079180946419
          policy_loss: -0.0017889954149723053
          total_loss: -0.003475178778171539
          vf_explained_var: 0.2819443941116333
          vf_loss: 0.009233722209723459
    num_agent_steps_sampled: 538000
    num_agent_steps_trained: 538000
    num_steps_sampled: 538000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,538,13933.6,538000,-2.9817,-2.36,-4.65,298.17


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 539000
  custom_metrics: {}
  date: 2021-11-05_16-24-42
  done: false
  episode_len_mean: 295.04
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.9503999999999815
  episode_reward_min: -4.649999999999945
  episodes_this_iter: 4
  episodes_total: 2029
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.110372402100693e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9740843143728044
          entropy_coeff: 0.009999999999999998
          kl: 0.009161400208720968
          policy_loss: 0.05405357744958666
          total_loss: 0.05483134508960777
          vf_explained_var: 0.3847561180591583
          vf_loss: 0.010518610953456825
    num_agent_steps_sampled: 539000
    num_agent_steps_trained: 539000
    num_steps_sampled: 539000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,539,13956.2,539000,-2.9504,-2.36,-4.65,295.04


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 540000
  custom_metrics: {}
  date: 2021-11-05_16-25-05
  done: false
  episode_len_mean: 293.01
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.9300999999999813
  episode_reward_min: -4.649999999999945
  episodes_this_iter: 4
  episodes_total: 2033
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.110372402100693e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.1130528204970889
          entropy_coeff: 0.009999999999999998
          kl: 0.015189575687492468
          policy_loss: 0.043195434328582555
          total_loss: 0.041136105358600614
          vf_explained_var: 0.40200984477996826
          vf_loss: 0.00907119693648484
    num_agent_steps_sampled: 540000
    num_agent_steps_trained: 540000
    num_steps_sampled: 540000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,540,13979.6,540000,-2.9301,-2.36,-4.65,293.01


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 541000
  custom_metrics: {}
  date: 2021-11-05_16-25-29
  done: false
  episode_len_mean: 289.75
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.8974999999999818
  episode_reward_min: -4.649999999999945
  episodes_this_iter: 3
  episodes_total: 2036
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.110372402100693e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9379008736875322
          entropy_coeff: 0.009999999999999998
          kl: 0.028872404864422347
          policy_loss: -0.09521589908334945
          total_loss: -0.0934697785311275
          vf_explained_var: 0.2322780042886734
          vf_loss: 0.011125128478225735
    num_agent_steps_sampled: 541000
    num_agent_steps_trained: 541000
    num_steps_sampled: 541000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,541,14003.5,541000,-2.8975,-2.36,-4.65,289.75




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 542000
  custom_metrics: {}
  date: 2021-11-05_16-26-08
  done: false
  episode_len_mean: 286.96
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.869599999999983
  episode_reward_min: -4.649999999999945
  episodes_this_iter: 4
  episodes_total: 2040
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.665558603151038e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6468284726142883
          entropy_coeff: 0.009999999999999998
          kl: 0.0071436969878558585
          policy_loss: 0.010442102866040335
          total_loss: 0.016706932915581597
          vf_explained_var: 0.11107103526592255
          vf_loss: 0.012733113455275695
    num_agent_steps_sampled: 542000
    num_agent_steps_trained: 542000
    num_steps_sampled: 542000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,542,14042.4,542000,-2.8696,-2.36,-4.65,286.96


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 543000
  custom_metrics: {}
  date: 2021-11-05_16-26-33
  done: false
  episode_len_mean: 285.87
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.858699999999983
  episode_reward_min: -4.649999999999945
  episodes_this_iter: 4
  episodes_total: 2044
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.665558603151038e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7410778284072876
          entropy_coeff: 0.009999999999999998
          kl: 0.011444891537085135
          policy_loss: 0.021359107312228946
          total_loss: 0.02295091234975391
          vf_explained_var: 0.2252224236726761
          vf_loss: 0.009002582185591261
    num_agent_steps_sampled: 543000
    num_agent_steps_trained: 543000
    num_steps_sampled: 543000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,543,14067.6,543000,-2.8587,-2.36,-4.65,285.87


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 544000
  custom_metrics: {}
  date: 2021-11-05_16-26-54
  done: false
  episode_len_mean: 287.07
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.8706999999999816
  episode_reward_min: -4.649999999999945
  episodes_this_iter: 3
  episodes_total: 2047
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.665558603151038e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8021812677383423
          entropy_coeff: 0.009999999999999998
          kl: 0.01658949113230166
          policy_loss: -0.037255208277040056
          total_loss: -0.03538881060149934
          vf_explained_var: 0.15536198019981384
          vf_loss: 0.009888210147619248
    num_agent_steps_sampled: 544000
    num_agent_steps_trained: 544000
    num_steps_sampled: 544000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,544,14087.8,544000,-2.8707,-2.36,-4.65,287.07


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 545000
  custom_metrics: {}
  date: 2021-11-05_16-27-16
  done: false
  episode_len_mean: 287.99
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -2.8798999999999824
  episode_reward_min: -4.649999999999945
  episodes_this_iter: 4
  episodes_total: 2051
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.665558603151038e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.591561492284139
          entropy_coeff: 0.009999999999999998
          kl: 0.011210678889419986
          policy_loss: -0.0010830203174716896
          total_loss: 0.006522707278943724
          vf_explained_var: 0.04724232479929924
          vf_loss: 0.013521341658714745
    num_agent_steps_sampled: 545000
    num_agent_steps_trained: 545000
    num_steps_sampled: 545000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,545,14109.8,545000,-2.8799,-2.37,-4.65,287.99


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 546000
  custom_metrics: {}
  date: 2021-11-05_16-27-38
  done: false
  episode_len_mean: 287.34
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -2.8733999999999833
  episode_reward_min: -4.649999999999945
  episodes_this_iter: 3
  episodes_total: 2054
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.665558603151038e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.859948335091273
          entropy_coeff: 0.009999999999999998
          kl: 0.05066023036215233
          policy_loss: 0.03833718622724215
          total_loss: 0.03823232857717408
          vf_explained_var: 0.2865773141384125
          vf_loss: 0.008494623822884427
    num_agent_steps_sampled: 546000
    num_agent_steps_trained: 546000
    num_steps_sampled: 546000
    num_steps_trained: 54

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,546,14131.9,546000,-2.8734,-2.37,-4.65,287.34


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 547000
  custom_metrics: {}
  date: 2021-11-05_16-28-01
  done: false
  episode_len_mean: 286.93
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -2.869299999999983
  episode_reward_min: -4.649999999999945
  episodes_this_iter: 4
  episodes_total: 2058
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.998337904726558e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9761048131518894
          entropy_coeff: 0.009999999999999998
          kl: 0.036653329981488104
          policy_loss: 0.02654611079229249
          total_loss: 0.02808985101679961
          vf_explained_var: 0.2682042717933655
          vf_loss: 0.011304787204911312
    num_agent_steps_sampled: 547000
    num_agent_steps_trained: 547000
    num_steps_sampled: 547000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,547,14154.8,547000,-2.8693,-2.37,-4.65,286.93


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 548000
  custom_metrics: {}
  date: 2021-11-05_16-28-22
  done: false
  episode_len_mean: 288.54
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -2.8853999999999824
  episode_reward_min: -4.649999999999945
  episodes_this_iter: 3
  episodes_total: 2061
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0497506857089838e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.0994234250651465
          entropy_coeff: 0.009999999999999998
          kl: 0.019775531565192454
          policy_loss: 0.05049102447099156
          total_loss: 0.050965374583999315
          vf_explained_var: 0.10957642644643784
          vf_loss: 0.011468585544369287
    num_agent_steps_sampled: 548000
    num_agent_steps_trained: 548000
    num_steps_sampled: 548000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,548,14175.7,548000,-2.8854,-2.37,-4.65,288.54


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 549000
  custom_metrics: {}
  date: 2021-11-05_16-28-42
  done: false
  episode_len_mean: 289.87
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -2.8986999999999825
  episode_reward_min: -4.649999999999945
  episodes_this_iter: 3
  episodes_total: 2064
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0497506857089838e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.0879760007063548
          entropy_coeff: 0.009999999999999998
          kl: 0.07585917205797854
          policy_loss: -0.05606659154097239
          total_loss: -0.05373667213651869
          vf_explained_var: -0.00841006264090538
          vf_loss: 0.013209679769352079
    num_agent_steps_sampled: 549000
    num_agent_steps_trained: 549000
    num_steps_sampled: 549000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,549,14196.1,549000,-2.8987,-2.37,-4.65,289.87


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 550000
  custom_metrics: {}
  date: 2021-11-05_16-29-03
  done: false
  episode_len_mean: 291.57
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -2.9156999999999815
  episode_reward_min: -4.649999999999945
  episodes_this_iter: 4
  episodes_total: 2068
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5746260285634757e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1501417729589674
          entropy_coeff: 0.009999999999999998
          kl: 0.018576324833180162
          policy_loss: -0.008409423960579766
          total_loss: -0.002741563734081056
          vf_explained_var: 0.009256038814783096
          vf_loss: 0.01716927518654201
    num_agent_steps_sampled: 550000
    num_agent_steps_trained: 550000
    num_steps_sampled: 550000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,550,14217.5,550000,-2.9157,-2.37,-4.65,291.57




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 551000
  custom_metrics: {}
  date: 2021-11-05_16-29-44
  done: false
  episode_len_mean: 290.71
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -2.9070999999999816
  episode_reward_min: -4.649999999999945
  episodes_this_iter: 3
  episodes_total: 2071
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5746260285634757e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.2026063495212131
          entropy_coeff: 0.009999999999999998
          kl: 0.015119852474227318
          policy_loss: -0.02215282850795322
          total_loss: -0.022682579772339926
          vf_explained_var: -0.0034059027675539255
          vf_loss: 0.011496313237067726
    num_agent_steps_sampled: 551000
    num_agent_steps_trained: 551000
    num_steps_sampled: 551000
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,551,14257.7,551000,-2.9071,-2.37,-4.65,290.71


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 552000
  custom_metrics: {}
  date: 2021-11-05_16-30-06
  done: false
  episode_len_mean: 285.24
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.852399999999982
  episode_reward_min: -4.649999999999945
  episodes_this_iter: 4
  episodes_total: 2075
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5746260285634757e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9716028604242537
          entropy_coeff: 0.009999999999999998
          kl: 0.006893564601927772
          policy_loss: 0.021993161323997708
          total_loss: 0.027326442963547175
          vf_explained_var: 0.17046688497066498
          vf_loss: 0.015049313474446536
    num_agent_steps_sampled: 552000
    num_agent_steps_trained: 552000
    num_steps_sampled: 552000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,552,14280.1,552000,-2.8524,-2.22,-4.65,285.24


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 553000
  custom_metrics: {}
  date: 2021-11-05_16-30-29
  done: false
  episode_len_mean: 280.13
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.8012999999999835
  episode_reward_min: -3.659999999999966
  episodes_this_iter: 4
  episodes_total: 2079
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5746260285634757e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9204810539881388
          entropy_coeff: 0.009999999999999998
          kl: 0.01735694224288914
          policy_loss: 0.017833904259734683
          total_loss: 0.02542693399720722
          vf_explained_var: -0.03127945959568024
          vf_loss: 0.01679784068837762
    num_agent_steps_sampled: 553000
    num_agent_steps_trained: 553000
    num_steps_sampled: 553000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,553,14303,553000,-2.8013,-2.22,-3.66,280.13


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 554000
  custom_metrics: {}
  date: 2021-11-05_16-30-51
  done: false
  episode_len_mean: 279.41
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.794099999999984
  episode_reward_min: -3.379999999999972
  episodes_this_iter: 3
  episodes_total: 2082
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5746260285634757e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.0508120954036713
          entropy_coeff: 0.009999999999999998
          kl: 0.01951387182557222
          policy_loss: 0.039370574139886434
          total_loss: 0.04002541626493136
          vf_explained_var: 0.033443842083215714
          vf_loss: 0.011162963455232482
    num_agent_steps_sampled: 554000
    num_agent_steps_trained: 554000
    num_steps_sampled: 554000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,554,14324.8,554000,-2.7941,-2.22,-3.38,279.41


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 555000
  custom_metrics: {}
  date: 2021-11-05_16-31-11
  done: false
  episode_len_mean: 279.71
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.7970999999999844
  episode_reward_min: -3.3899999999999717
  episodes_this_iter: 3
  episodes_total: 2085
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5746260285634757e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.163091429736879
          entropy_coeff: 0.009999999999999998
          kl: 0.04944513316898915
          policy_loss: -0.015465930435392591
          total_loss: -0.01495439178413815
          vf_explained_var: -0.024919552728533745
          vf_loss: 0.012142450576518766
    num_agent_steps_sampled: 555000
    num_agent_steps_trained: 555000
    num_steps_sampled: 555000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,555,14345,555000,-2.7971,-2.22,-3.39,279.71


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 556000
  custom_metrics: {}
  date: 2021-11-05_16-31-34
  done: false
  episode_len_mean: 279.53
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.7952999999999832
  episode_reward_min: -3.3899999999999717
  episodes_this_iter: 4
  episodes_total: 2089
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3619390428452134e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.071506573756536
          entropy_coeff: 0.009999999999999998
          kl: 0.020985892710067136
          policy_loss: 0.013702519237995148
          total_loss: 0.02016175736983617
          vf_explained_var: 0.08130031824111938
          vf_loss: 0.017174305239071447
    num_agent_steps_sampled: 556000
    num_agent_steps_trained: 556000
    num_steps_sampled: 556000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,556,14367.6,556000,-2.7953,-2.22,-3.39,279.53


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 557000
  custom_metrics: {}
  date: 2021-11-05_16-31-56
  done: false
  episode_len_mean: 280.22
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.8021999999999845
  episode_reward_min: -3.3899999999999717
  episodes_this_iter: 3
  episodes_total: 2092
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5429085642678205e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.106412054432763
          entropy_coeff: 0.009999999999999998
          kl: 0.013230668125623563
          policy_loss: -0.0627450790670183
          total_loss: -0.06166489885913001
          vf_explained_var: -0.2905539274215698
          vf_loss: 0.012144298075387876
    num_agent_steps_sampled: 557000
    num_agent_steps_trained: 557000
    num_steps_sampled: 557000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,557,14389.6,557000,-2.8022,-2.22,-3.39,280.22


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 558000
  custom_metrics: {}
  date: 2021-11-05_16-32-18
  done: false
  episode_len_mean: 280.48
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.804799999999984
  episode_reward_min: -3.3899999999999717
  episodes_this_iter: 4
  episodes_total: 2096
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5429085642678205e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.0501477499802907
          entropy_coeff: 0.009999999999999998
          kl: 0.013841736978893791
          policy_loss: 0.017013847827911377
          total_loss: 0.0215798100663556
          vf_explained_var: 0.19479522109031677
          vf_loss: 0.01506743911239836
    num_agent_steps_sampled: 558000
    num_agent_steps_trained: 558000
    num_steps_sampled: 558000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,558,14412.1,558000,-2.8048,-2.22,-3.39,280.48




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 559000
  custom_metrics: {}
  date: 2021-11-05_16-32-58
  done: false
  episode_len_mean: 279.98
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.7997999999999843
  episode_reward_min: -3.3899999999999717
  episodes_this_iter: 4
  episodes_total: 2100
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5429085642678205e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.0253009776274362
          entropy_coeff: 0.009999999999999998
          kl: 0.013570306417167988
          policy_loss: 0.00413801951540841
          total_loss: 0.009679740998480055
          vf_explained_var: 0.2053712159395218
          vf_loss: 0.015794731428225836
    num_agent_steps_sampled: 559000
    num_agent_steps_trained: 559000
    num_steps_sampled: 559000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,559,14451.7,559000,-2.7998,-2.22,-3.39,279.98


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 560000
  custom_metrics: {}
  date: 2021-11-05_16-33-24
  done: false
  episode_len_mean: 279.76
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.797599999999983
  episode_reward_min: -3.3899999999999717
  episodes_this_iter: 3
  episodes_total: 2103
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5429085642678205e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.904321371184455
          entropy_coeff: 0.009999999999999998
          kl: 0.01796929652646817
          policy_loss: -0.05024190793434779
          total_loss: -0.053188900856508146
          vf_explained_var: 0.6267938613891602
          vf_loss: 0.006096219245551361
    num_agent_steps_sampled: 560000
    num_agent_steps_trained: 560000
    num_steps_sampled: 560000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,560,14477.8,560000,-2.7976,-2.22,-3.39,279.76


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 561000
  custom_metrics: {}
  date: 2021-11-05_16-33-49
  done: false
  episode_len_mean: 278.25
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.782499999999984
  episode_reward_min: -3.3899999999999717
  episodes_this_iter: 4
  episodes_total: 2107
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5429085642678205e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6634800460603502
          entropy_coeff: 0.009999999999999998
          kl: 0.022612582972849616
          policy_loss: -0.10373499716321627
          total_loss: -0.09368278905749321
          vf_explained_var: 0.21503429114818573
          vf_loss: 0.016687009183482992
    num_agent_steps_sampled: 561000
    num_agent_steps_trained: 561000
    num_steps_sampled: 561000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,561,14502.6,561000,-2.7825,-2.22,-3.39,278.25


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 562000
  custom_metrics: {}
  date: 2021-11-05_16-34-14
  done: false
  episode_len_mean: 277.14
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.7713999999999857
  episode_reward_min: -3.3899999999999717
  episodes_this_iter: 4
  episodes_total: 2111
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.31436284640173e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.7608406656318241
          entropy_coeff: 0.009999999999999998
          kl: 0.008664362893529187
          policy_loss: -0.10481277195115885
          total_loss: -0.1032006002134747
          vf_explained_var: 0.7370356321334839
          vf_loss: 0.009220578671536512
    num_agent_steps_sampled: 562000
    num_agent_steps_trained: 562000
    num_steps_sampled: 562000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,562,14527.3,562000,-2.7714,-2.22,-3.39,277.14


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 563000
  custom_metrics: {}
  date: 2021-11-05_16-34-38
  done: false
  episode_len_mean: 276.23
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.7622999999999855
  episode_reward_min: -3.3899999999999717
  episodes_this_iter: 4
  episodes_total: 2115
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.31436284640173e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8852424661318461
          entropy_coeff: 0.009999999999999998
          kl: 0.06508690787345933
          policy_loss: -0.04680444374680519
          total_loss: -0.04555946720971001
          vf_explained_var: 0.5830690860748291
          vf_loss: 0.010097396870454153
    num_agent_steps_sampled: 563000
    num_agent_steps_trained: 563000
    num_steps_sampled: 563000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,563,14552,563000,-2.7623,-2.22,-3.39,276.23


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 564000
  custom_metrics: {}
  date: 2021-11-05_16-34-56
  done: false
  episode_len_mean: 277.71
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.777099999999985
  episode_reward_min: -4.189999999999955
  episodes_this_iter: 3
  episodes_total: 2118
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.971544269602596e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1155852337678274
          entropy_coeff: 0.009999999999999998
          kl: 0.038080232431671295
          policy_loss: -0.09407970731457074
          total_loss: -0.09377675015065405
          vf_explained_var: 0.17606613039970398
          vf_loss: 0.011458806719423996
    num_agent_steps_sampled: 564000
    num_agent_steps_trained: 564000
    num_steps_sampled: 564000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,564,14569.3,564000,-2.7771,-2.22,-4.19,277.71


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 565000
  custom_metrics: {}
  date: 2021-11-05_16-35-13
  done: false
  episode_len_mean: 282.43
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.8242999999999836
  episode_reward_min: -4.90999999999994
  episodes_this_iter: 3
  episodes_total: 2121
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1957316404403888e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.277859874566396
          entropy_coeff: 0.009999999999999998
          kl: 0.012918206809194841
          policy_loss: 0.06762697845697403
          total_loss: 0.05986331717835532
          vf_explained_var: 0.3147978186607361
          vf_loss: 0.005014935955922637
    num_agent_steps_sampled: 565000
    num_agent_steps_trained: 565000
    num_steps_sampled: 565000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,565,14586.2,565000,-2.8243,-2.22,-4.91,282.43


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 566000
  custom_metrics: {}
  date: 2021-11-05_16-35-32
  done: false
  episode_len_mean: 284.53
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.845299999999983
  episode_reward_min: -4.90999999999994
  episodes_this_iter: 2
  episodes_total: 2123
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1957316404403888e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2688115583525763
          entropy_coeff: 0.009999999999999998
          kl: 0.026305461508048744
          policy_loss: -0.07809722075859706
          total_loss: -0.08019528480039703
          vf_explained_var: 0.17647257447242737
          vf_loss: 0.0105900501832366
    num_agent_steps_sampled: 566000
    num_agent_steps_trained: 566000
    num_steps_sampled: 566000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,566,14605.5,566000,-2.8453,-2.22,-4.91,284.53


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 567000
  custom_metrics: {}
  date: 2021-11-05_16-35-48
  done: false
  episode_len_mean: 288.32
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.8831999999999818
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 2126
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7935974606605842e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.4641696201430427
          entropy_coeff: 0.009999999999999998
          kl: 0.05620319476016375
          policy_loss: 0.08163225932253731
          total_loss: 0.0730304835571183
          vf_explained_var: -0.05490909516811371
          vf_loss: 0.006039911648743631
    num_agent_steps_sampled: 567000
    num_agent_steps_trained: 567000
    num_steps_sampled: 567000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,567,14621.1,567000,-2.8832,-2.22,-5.22,288.32


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 568000
  custom_metrics: {}
  date: 2021-11-05_16-36-07
  done: false
  episode_len_mean: 290.91
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.9090999999999823
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 2129
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6903961909908763e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.3522295633951822
          entropy_coeff: 0.009999999999999998
          kl: 0.016163600092363596
          policy_loss: 0.04034267233477699
          total_loss: 0.03894398460785548
          vf_explained_var: -0.2552538216114044
          vf_loss: 0.012123605704982766
    num_agent_steps_sampled: 568000
    num_agent_steps_trained: 568000
    num_steps_sampled: 568000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,568,14640.1,568000,-2.9091,-2.22,-5.22,290.91




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 569000
  custom_metrics: {}
  date: 2021-11-05_16-36-43
  done: false
  episode_len_mean: 292.34
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.9233999999999822
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 2
  episodes_total: 2131
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6903961909908763e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2699162827597723
          entropy_coeff: 0.009999999999999998
          kl: 0.022644316256892526
          policy_loss: -0.10772678057352701
          total_loss: -0.11080807414319781
          vf_explained_var: 0.10638532787561417
          vf_loss: 0.009617860203919311
    num_agent_steps_sampled: 569000
    num_agent_steps_trained: 569000
    num_steps_sampled: 569000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,569,14676.9,569000,-2.9234,-2.22,-5.22,292.34


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 570000
  custom_metrics: {}
  date: 2021-11-05_16-37-02
  done: false
  episode_len_mean: 295.51
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.955099999999981
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 2134
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.0355942864863124e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.3730473452144198
          entropy_coeff: 0.009999999999999998
          kl: 0.010225416739812785
          policy_loss: 0.004378806551297506
          total_loss: -4.1790720489290026e-05
          vf_explained_var: 0.3968257009983063
          vf_loss: 0.00930987560423091
    num_agent_steps_sampled: 570000
    num_agent_steps_trained: 570000
    num_steps_sampled: 570000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,570,14695.6,570000,-2.9551,-2.22,-5.22,295.51


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 571000
  custom_metrics: {}
  date: 2021-11-05_16-37-19
  done: false
  episode_len_mean: 299.16
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.9915999999999805
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 2137
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.0355942864863124e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.212256246805191
          entropy_coeff: 0.009999999999999998
          kl: 0.012889477001952462
          policy_loss: 0.040274883641137016
          total_loss: 0.038602528803878364
          vf_explained_var: 0.1183747798204422
          vf_loss: 0.01045020284121468
    num_agent_steps_sampled: 571000
    num_agent_steps_trained: 571000
    num_steps_sampled: 571000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,571,14712.7,571000,-2.9916,-2.22,-5.22,299.16


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 572000
  custom_metrics: {}
  date: 2021-11-05_16-37-41
  done: false
  episode_len_mean: 300.28
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.0027999999999806
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 2
  episodes_total: 2139
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.0355942864863124e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0667304045624204
          entropy_coeff: 0.009999999999999998
          kl: 0.014607963252956472
          policy_loss: -0.0848732031053967
          total_loss: -0.08609100613329146
          vf_explained_var: -0.16352826356887817
          vf_loss: 0.009449496979100837
    num_agent_steps_sampled: 572000
    num_agent_steps_trained: 572000
    num_steps_sampled: 572000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,572,14734.6,572000,-3.0028,-2.22,-5.22,300.28


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 573000
  custom_metrics: {}
  date: 2021-11-05_16-38-02
  done: false
  episode_len_mean: 303.26
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.032599999999978
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 4
  episodes_total: 2143
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.0355942864863124e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1439670185248056
          entropy_coeff: 0.009999999999999998
          kl: 0.09487730435690926
          policy_loss: 0.01189284986919827
          total_loss: 0.009236078709363937
          vf_explained_var: 0.5286214351654053
          vf_loss: 0.00878286219926344
    num_agent_steps_sampled: 573000
    num_agent_steps_trained: 573000
    num_steps_sampled: 573000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,573,14755.2,573000,-3.0326,-2.22,-5.22,303.26


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 574000
  custom_metrics: {}
  date: 2021-11-05_16-38-21
  done: false
  episode_len_mean: 304.47
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.0446999999999793
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 2146
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.05339142972947e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1266258656978607
          entropy_coeff: 0.009999999999999998
          kl: 0.03020100223952061
          policy_loss: 0.020907373022702007
          total_loss: 0.0198693154586686
          vf_explained_var: 0.34377095103263855
          vf_loss: 0.010228181723505259
    num_agent_steps_sampled: 574000
    num_agent_steps_trained: 574000
    num_steps_sampled: 574000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,574,14774.7,574000,-3.0447,-2.22,-5.22,304.47


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 575000
  custom_metrics: {}
  date: 2021-11-05_16-38-39
  done: false
  episode_len_mean: 306.16
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.061599999999978
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 2
  episodes_total: 2148
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.080087144594204e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2200216703944735
          entropy_coeff: 0.009999999999999998
          kl: 0.011407747969998914
          policy_loss: -0.13398572951555252
          total_loss: -0.13899007621738646
          vf_explained_var: 0.5463922023773193
          vf_loss: 0.00719586740694164
    num_agent_steps_sampled: 575000
    num_agent_steps_trained: 575000
    num_steps_sampled: 575000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,575,14792.7,575000,-3.0616,-2.22,-5.22,306.16


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 576000
  custom_metrics: {}
  date: 2021-11-05_16-38-57
  done: false
  episode_len_mean: 308.83
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.088299999999978
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 2151
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.080087144594204e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2067084140247768
          entropy_coeff: 0.009999999999999998
          kl: 0.013696444779750294
          policy_loss: 0.0500235872136222
          total_loss: 0.04495940854152044
          vf_explained_var: -0.07920803874731064
          vf_loss: 0.00700289662232131
    num_agent_steps_sampled: 576000
    num_agent_steps_trained: 576000
    num_steps_sampled: 576000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,576,14810.5,576000,-3.0883,-2.22,-5.22,308.83


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 577000
  custom_metrics: {}
  date: 2021-11-05_16-39-15
  done: false
  episode_len_mean: 310.79
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.1078999999999777
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 2
  episodes_total: 2153
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.080087144594204e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1522824856970044
          entropy_coeff: 0.009999999999999998
          kl: 0.022685409389263514
          policy_loss: -0.06928437939948506
          total_loss: -0.0744089717666308
          vf_explained_var: 0.4071039855480194
          vf_loss: 0.006398215006144407
    num_agent_steps_sampled: 577000
    num_agent_steps_trained: 577000
    num_steps_sampled: 577000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,577,14828.2,577000,-3.1079,-2.22,-5.22,310.79


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 578000
  custom_metrics: {}
  date: 2021-11-05_16-39-33
  done: false
  episode_len_mean: 314.0
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.139999999999976
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 2156
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3620130716891305e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0696800258424548
          entropy_coeff: 0.009999999999999998
          kl: 0.013217023767775496
          policy_loss: -0.07862237551146084
          total_loss: -0.07640809756186273
          vf_explained_var: 0.14082859456539154
          vf_loss: 0.012911060959514645
    num_agent_steps_sampled: 578000
    num_agent_steps_trained: 578000
    num_steps_sampled: 578000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,578,14846.2,578000,-3.14,-2.22,-5.22,314


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 579000
  custom_metrics: {}
  date: 2021-11-05_16-39-52
  done: false
  episode_len_mean: 316.11
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.161099999999976
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 2159
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3620130716891305e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1068007760577732
          entropy_coeff: 0.009999999999999998
          kl: 0.015977291355592154
          policy_loss: -0.005918318695492215
          total_loss: -0.007979216509395176
          vf_explained_var: 0.37181535363197327
          vf_loss: 0.009007096206834023
    num_agent_steps_sampled: 579000
    num_agent_steps_trained: 579000
    num_steps_sampled: 579000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,579,14865.4,579000,-3.1611,-2.22,-5.22,316.11




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 580000
  custom_metrics: {}
  date: 2021-11-05_16-40-29
  done: false
  episode_len_mean: 317.46
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.174599999999976
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 2162
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3620130716891305e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1749316453933716
          entropy_coeff: 0.009999999999999998
          kl: 0.014315956007859572
          policy_loss: 0.043841764993137786
          total_loss: 0.0417858910229471
          vf_explained_var: 0.39851632714271545
          vf_loss: 0.009693429936628997
    num_agent_steps_sampled: 580000
    num_agent_steps_trained: 580000
    num_steps_sampled: 580000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,580,14902.2,580000,-3.1746,-2.22,-5.22,317.46


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 581000
  custom_metrics: {}
  date: 2021-11-05_16-40-48
  done: false
  episode_len_mean: 317.93
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.179299999999976
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 2165
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3620130716891305e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.226351535320282
          entropy_coeff: 0.009999999999999998
          kl: 0.01565425501211103
          policy_loss: 0.07489519524905416
          total_loss: 0.07209448582596249
          vf_explained_var: 0.3502362072467804
          vf_loss: 0.009462786907614726
    num_agent_steps_sampled: 581000
    num_agent_steps_trained: 581000
    num_steps_sampled: 581000
    num_steps_trained: 58

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,581,14921.3,581000,-3.1793,-2.22,-5.22,317.93


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 582000
  custom_metrics: {}
  date: 2021-11-05_16-41-07
  done: false
  episode_len_mean: 319.7
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.196999999999976
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 2168
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3620130716891305e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1934896257188585
          entropy_coeff: 0.009999999999999998
          kl: 0.012021315422999813
          policy_loss: 0.05741671770811081
          total_loss: 0.055723205374346836
          vf_explained_var: -0.21003226935863495
          vf_loss: 0.010241370902804192
    num_agent_steps_sampled: 582000
    num_agent_steps_trained: 582000
    num_steps_sampled: 582000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,582,14940.3,582000,-3.197,-2.22,-5.22,319.7


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 583000
  custom_metrics: {}
  date: 2021-11-05_16-41-23
  done: false
  episode_len_mean: 322.9
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -3.228999999999975
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 2
  episodes_total: 2170
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3620130716891305e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1406489345762465
          entropy_coeff: 0.009999999999999998
          kl: 0.012019515272675266
          policy_loss: 0.04551609092288547
          total_loss: 0.04098097797897127
          vf_explained_var: -0.4684077799320221
          vf_loss: 0.0068713635112443525
    num_agent_steps_sampled: 583000
    num_agent_steps_trained: 583000
    num_steps_sampled: 583000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,583,14956.1,583000,-3.229,-2.22,-5.22,322.9


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 584000
  custom_metrics: {}
  date: 2021-11-05_16-41-40
  done: false
  episode_len_mean: 325.75
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.2574999999999745
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 2
  episodes_total: 2172
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3620130716891305e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.069133957889345
          entropy_coeff: 0.009999999999999998
          kl: 0.013803502587041154
          policy_loss: -0.06552886217832565
          total_loss: -0.06652220065395037
          vf_explained_var: 0.20406559109687805
          vf_loss: 0.009697983607960243
    num_agent_steps_sampled: 584000
    num_agent_steps_trained: 584000
    num_steps_sampled: 584000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,584,14973.2,584000,-3.2575,-2.26,-5.22,325.75


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 585000
  custom_metrics: {}
  date: 2021-11-05_16-41-57
  done: false
  episode_len_mean: 329.48
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.294799999999973
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 2175
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3620130716891305e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0295516848564148
          entropy_coeff: 0.009999999999999998
          kl: 0.01172088963140791
          policy_loss: 0.04766634288761351
          total_loss: 0.049492026368776955
          vf_explained_var: -0.2125551402568817
          vf_loss: 0.012121185130995905
    num_agent_steps_sampled: 585000
    num_agent_steps_trained: 585000
    num_steps_sampled: 585000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,585,14990.5,585000,-3.2948,-2.26,-5.22,329.48


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 586000
  custom_metrics: {}
  date: 2021-11-05_16-42-16
  done: false
  episode_len_mean: 332.29
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.3228999999999735
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 2178
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3620130716891305e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1431692375077143
          entropy_coeff: 0.009999999999999998
          kl: 0.01027484825065225
          policy_loss: 0.10377649896674686
          total_loss: 0.09918738570478228
          vf_explained_var: 0.12091219425201416
          vf_loss: 0.006842564447692388
    num_agent_steps_sampled: 586000
    num_agent_steps_trained: 586000
    num_steps_sampled: 586000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,586,15009,586000,-3.3229,-2.26,-5.22,332.29


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 587000
  custom_metrics: {}
  date: 2021-11-05_16-42-34
  done: false
  episode_len_mean: 334.07
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.3406999999999734
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 2
  episodes_total: 2180
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3620130716891305e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0770719091097514
          entropy_coeff: 0.009999999999999998
          kl: 0.015020841882846032
          policy_loss: -0.07433586849106683
          total_loss: -0.07216027991639243
          vf_explained_var: 0.036093901842832565
          vf_loss: 0.012946288595493469
    num_agent_steps_sampled: 587000
    num_agent_steps_trained: 587000
    num_steps_sampled: 587000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,587,15027,587000,-3.3407,-2.26,-5.22,334.07


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 588000
  custom_metrics: {}
  date: 2021-11-05_16-42-53
  done: false
  episode_len_mean: 335.99
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.359899999999973
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 2183
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3620130716891305e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0805855287445916
          entropy_coeff: 0.009999999999999998
          kl: 0.014922686701205222
          policy_loss: -0.09310179634226692
          total_loss: -0.08625931955046123
          vf_explained_var: 0.06230514496564865
          vf_loss: 0.017648317851126194
    num_agent_steps_sampled: 588000
    num_agent_steps_trained: 588000
    num_steps_sampled: 588000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,588,15046.1,588000,-3.3599,-2.26,-5.22,335.99


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 589000
  custom_metrics: {}
  date: 2021-11-05_16-43-13
  done: false
  episode_len_mean: 336.61
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.366099999999972
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 2186
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3620130716891305e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0944803284274207
          entropy_coeff: 0.009999999999999998
          kl: 0.019052502279561196
          policy_loss: -0.061717467175589666
          total_loss: -0.0577957674033112
          vf_explained_var: 0.09310299903154373
          vf_loss: 0.014866476982004113
    num_agent_steps_sampled: 589000
    num_agent_steps_trained: 589000
    num_steps_sampled: 589000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,589,15066.2,589000,-3.3661,-2.26,-5.22,336.61


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 590000
  custom_metrics: {}
  date: 2021-11-05_16-43-34
  done: false
  episode_len_mean: 338.29
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.382899999999972
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 2189
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3620130716891305e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9758714894453685
          entropy_coeff: 0.009999999999999998
          kl: 0.01105472910408941
          policy_loss: -0.0959465601378017
          total_loss: -0.08972376477387216
          vf_explained_var: 0.12033674120903015
          vf_loss: 0.015981501961747805
    num_agent_steps_sampled: 590000
    num_agent_steps_trained: 590000
    num_steps_sampled: 590000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,590,15086.9,590000,-3.3829,-2.26,-5.22,338.29




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 591000
  custom_metrics: {}
  date: 2021-11-05_16-44-14
  done: false
  episode_len_mean: 338.28
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.382799999999972
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 4
  episodes_total: 2193
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3620130716891305e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0535294830799102
          entropy_coeff: 0.009999999999999998
          kl: 0.00620620177260365
          policy_loss: 0.03699322177304162
          total_loss: 0.04031039070751932
          vf_explained_var: 0.20954714715480804
          vf_loss: 0.013852456719097165
    num_agent_steps_sampled: 591000
    num_agent_steps_trained: 591000
    num_steps_sampled: 591000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,591,15126.9,591000,-3.3828,-2.26,-5.22,338.28


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 592000
  custom_metrics: {}
  date: 2021-11-05_16-44-36
  done: false
  episode_len_mean: 338.67
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.386699999999972
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 2196
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3620130716891305e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0887392699718474
          entropy_coeff: 0.009999999999999998
          kl: 0.025656892491573855
          policy_loss: 0.016192127764225007
          total_loss: 0.015963358928759892
          vf_explained_var: 0.16953369975090027
          vf_loss: 0.010658590895279001
    num_agent_steps_sampled: 592000
    num_agent_steps_trained: 592000
    num_steps_sampled: 592000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,592,15148.4,592000,-3.3867,-2.26,-5.22,338.67


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 593000
  custom_metrics: {}
  date: 2021-11-05_16-44-58
  done: false
  episode_len_mean: 339.53
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -3.3952999999999713
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 4
  episodes_total: 2200
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0430196075336962e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8957740763823191
          entropy_coeff: 0.009999999999999998
          kl: 0.004144690295875996
          policy_loss: 0.007757195168071323
          total_loss: 0.012088846663633983
          vf_explained_var: 0.2502301037311554
          vf_loss: 0.013289389510949453
    num_agent_steps_sampled: 593000
    num_agent_steps_trained: 593000
    num_steps_sampled: 593000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,593,15170.9,593000,-3.3953,-2.31,-5.22,339.53


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 594000
  custom_metrics: {}
  date: 2021-11-05_16-45-20
  done: false
  episode_len_mean: 340.33
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -3.403299999999971
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 2203
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0215098037668481e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.048903312948015
          entropy_coeff: 0.009999999999999998
          kl: 0.01260523126109029
          policy_loss: 0.02533346778816647
          total_loss: 0.02367549215753873
          vf_explained_var: 0.2441367506980896
          vf_loss: 0.008831049481523223
    num_agent_steps_sampled: 594000
    num_agent_steps_trained: 594000
    num_steps_sampled: 594000
    num_steps_trained: 59

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,594,15192.9,594000,-3.4033,-2.31,-5.22,340.33


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 595000
  custom_metrics: {}
  date: 2021-11-05_16-45-42
  done: false
  episode_len_mean: 342.27
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -3.4226999999999714
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 4
  episodes_total: 2207
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0215098037668481e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9626699970828162
          entropy_coeff: 0.009999999999999998
          kl: 0.007650665970513989
          policy_loss: -0.005344120330280728
          total_loss: -0.002562412205669615
          vf_explained_var: 0.29205238819122314
          vf_loss: 0.012408404010865423
    num_agent_steps_sampled: 595000
    num_agent_steps_trained: 595000
    num_steps_sampled: 595000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,595,15214.8,595000,-3.4227,-2.44,-5.22,342.27


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 596000
  custom_metrics: {}
  date: 2021-11-05_16-46-03
  done: false
  episode_len_mean: 343.82
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -3.4381999999999704
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 2210
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0215098037668481e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.2077380816141765
          entropy_coeff: 0.009999999999999998
          kl: 0.02405296150553637
          policy_loss: 0.026543293231063417
          total_loss: 0.027640734612941743
          vf_explained_var: -0.2205761820077896
          vf_loss: 0.013174798732507043
    num_agent_steps_sampled: 596000
    num_agent_steps_trained: 596000
    num_steps_sampled: 596000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,596,15235.7,596000,-3.4382,-2.44,-5.22,343.82


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 597000
  custom_metrics: {}
  date: 2021-11-05_16-46-24
  done: false
  episode_len_mean: 345.51
  episode_media: {}
  episode_reward_max: -2.479999999999991
  episode_reward_mean: -3.4550999999999705
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 2213
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.532264705650272e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.103191081682841
          entropy_coeff: 0.009999999999999998
          kl: 0.04133205655664859
          policy_loss: -0.01213905442919996
          total_loss: -0.014865997433662414
          vf_explained_var: -0.25031474232673645
          vf_loss: 0.008304912228615851
    num_agent_steps_sampled: 597000
    num_agent_steps_trained: 597000
    num_steps_sampled: 597000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,597,15257.1,597000,-3.4551,-2.48,-5.22,345.51


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 598000
  custom_metrics: {}
  date: 2021-11-05_16-46-47
  done: false
  episode_len_mean: 345.13
  episode_media: {}
  episode_reward_max: -2.479999999999991
  episode_reward_mean: -3.4512999999999696
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 4
  episodes_total: 2217
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2983970584754074e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.99816478755739
          entropy_coeff: 0.009999999999999998
          kl: 0.009067573537007038
          policy_loss: 0.012162454840209749
          total_loss: 0.01930930759343836
          vf_explained_var: 0.06524345278739929
          vf_loss: 0.01712848258515199
    num_agent_steps_sampled: 598000
    num_agent_steps_trained: 598000
    num_steps_sampled: 598000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,598,15279.3,598000,-3.4513,-2.48,-5.22,345.13




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 599000
  custom_metrics: {}
  date: 2021-11-05_16-47-26
  done: false
  episode_len_mean: 341.69
  episode_media: {}
  episode_reward_max: -2.479999999999991
  episode_reward_mean: -3.416899999999971
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 3
  episodes_total: 2220
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2983970584754074e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1115892509619394
          entropy_coeff: 0.009999999999999998
          kl: 0.024062167557842427
          policy_loss: -0.054463163514931995
          total_loss: -0.04994714806477229
          vf_explained_var: 0.07878147810697556
          vf_loss: 0.015631851895401876
    num_agent_steps_sampled: 599000
    num_agent_steps_trained: 599000
    num_steps_sampled: 599000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,599,15318.2,599000,-3.4169,-2.48,-5.22,341.69


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 600000
  custom_metrics: {}
  date: 2021-11-05_16-47-50
  done: false
  episode_len_mean: 339.07
  episode_media: {}
  episode_reward_max: -2.479999999999991
  episode_reward_mean: -3.3906999999999714
  episode_reward_min: -5.219999999999933
  episodes_this_iter: 4
  episodes_total: 2224
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.447595587713112e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0634789493348864
          entropy_coeff: 0.009999999999999998
          kl: 0.019818019384216255
          policy_loss: 0.009663579199049208
          total_loss: 0.015395543062024647
          vf_explained_var: 0.16130010783672333
          vf_loss: 0.01636668687893285
    num_agent_steps_sampled: 600000
    num_agent_steps_trained: 600000
    num_steps_sampled: 600000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,600,15342.4,600000,-3.3907,-2.48,-5.22,339.07


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 601000
  custom_metrics: {}
  date: 2021-11-05_16-48-14
  done: false
  episode_len_mean: 333.27
  episode_media: {}
  episode_reward_max: -2.479999999999991
  episode_reward_mean: -3.3326999999999725
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 4
  episodes_total: 2228
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.447595587713112e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8427859571244981
          entropy_coeff: 0.009999999999999998
          kl: 0.008829880704303603
          policy_loss: -0.0017888940042919584
          total_loss: 0.00503389909863472
          vf_explained_var: 0.19558237493038177
          vf_loss: 0.015250627737906244
    num_agent_steps_sampled: 601000
    num_agent_steps_trained: 601000
    num_steps_sampled: 601000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,601,15366.2,601000,-3.3327,-2.48,-4.48,333.27


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 602000
  custom_metrics: {}
  date: 2021-11-05_16-48-39
  done: false
  episode_len_mean: 330.92
  episode_media: {}
  episode_reward_max: -2.479999999999991
  episode_reward_mean: -3.309199999999973
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 3
  episodes_total: 2231
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.447595587713112e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8805147078302171
          entropy_coeff: 0.009999999999999998
          kl: 0.013772160769455609
          policy_loss: -0.09833119387427966
          total_loss: -0.09068872887227271
          vf_explained_var: 0.234648659825325
          vf_loss: 0.016447567256788413
    num_agent_steps_sampled: 602000
    num_agent_steps_trained: 602000
    num_steps_sampled: 602000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,602,15391.5,602000,-3.3092,-2.48,-4.48,330.92


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 603000
  custom_metrics: {}
  date: 2021-11-05_16-49-01
  done: false
  episode_len_mean: 327.42
  episode_media: {}
  episode_reward_max: -2.479999999999991
  episode_reward_mean: -3.274199999999974
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 4
  episodes_total: 2235
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.447595587713112e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9980232179164886
          entropy_coeff: 0.009999999999999998
          kl: 0.016154646392065194
          policy_loss: 0.012472700658771726
          total_loss: 0.016585513038767707
          vf_explained_var: 0.2813792824745178
          vf_loss: 0.014092989379747047
    num_agent_steps_sampled: 603000
    num_agent_steps_trained: 603000
    num_steps_sampled: 603000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,603,15414,603000,-3.2742,-2.48,-4.48,327.42


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 604000
  custom_metrics: {}
  date: 2021-11-05_16-49-24
  done: false
  episode_len_mean: 323.03
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.2302999999999753
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 4
  episodes_total: 2239
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.447595587713112e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.060237897104687
          entropy_coeff: 0.009999999999999998
          kl: 0.024386537966664148
          policy_loss: 0.008324537177880604
          total_loss: 0.00865055053598351
          vf_explained_var: 0.527031421661377
          vf_loss: 0.010928310624634226
    num_agent_steps_sampled: 604000
    num_agent_steps_trained: 604000
    num_steps_sampled: 604000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,604,15436.7,604000,-3.2303,-2.5,-4.48,323.03


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 605000
  custom_metrics: {}
  date: 2021-11-05_16-49-43
  done: false
  episode_len_mean: 322.39
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.2238999999999756
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 3
  episodes_total: 2242
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.171393381569669e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0087275485197702
          entropy_coeff: 0.009999999999999998
          kl: 0.019667801993489636
          policy_loss: 0.010186108450094858
          total_loss: 0.009281225833627912
          vf_explained_var: 0.6971339583396912
          vf_loss: 0.009182291106683099
    num_agent_steps_sampled: 605000
    num_agent_steps_trained: 605000
    num_steps_sampled: 605000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,605,15455.9,605000,-3.2239,-2.5,-4.48,322.39


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 606000
  custom_metrics: {}
  date: 2021-11-05_16-50-07
  done: false
  episode_len_mean: 321.5
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.2149999999999754
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 3
  episodes_total: 2245
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.171393381569669e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.080857249763277
          entropy_coeff: 0.009999999999999998
          kl: 0.010389395422757275
          policy_loss: -0.13040735754701827
          total_loss: -0.13083854847484164
          vf_explained_var: 0.6204390525817871
          vf_loss: 0.010377332764781184
    num_agent_steps_sampled: 606000
    num_agent_steps_trained: 606000
    num_steps_sampled: 606000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,606,15479.6,606000,-3.215,-2.5,-4.48,321.5


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 607000
  custom_metrics: {}
  date: 2021-11-05_16-50-31
  done: false
  episode_len_mean: 317.03
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.1702999999999766
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 4
  episodes_total: 2249
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.171393381569669e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8643631272845798
          entropy_coeff: 0.009999999999999998
          kl: 0.011635257456455111
          policy_loss: -0.07174079459574488
          total_loss: -0.07060859244730737
          vf_explained_var: 0.5560640692710876
          vf_loss: 0.00977577960325612
    num_agent_steps_sampled: 607000
    num_agent_steps_trained: 607000
    num_steps_sampled: 607000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,607,15503.2,607000,-3.1703,-2.5,-4.48,317.03




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 608000
  custom_metrics: {}
  date: 2021-11-05_16-51-12
  done: false
  episode_len_mean: 312.65
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -3.1264999999999774
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 4
  episodes_total: 2253
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.171393381569669e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8786492791440752
          entropy_coeff: 0.009999999999999998
          kl: 0.01727033021794758
          policy_loss: 0.013159818160865042
          total_loss: 0.014887109109097057
          vf_explained_var: 0.5481513738632202
          vf_loss: 0.010513696260750294
    num_agent_steps_sampled: 608000
    num_agent_steps_trained: 608000
    num_steps_sampled: 608000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,608,15544.3,608000,-3.1265,-2.3,-4.48,312.65


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 609000
  custom_metrics: {}
  date: 2021-11-05_16-51-35
  done: false
  episode_len_mean: 307.87
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -3.078699999999979
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 4
  episodes_total: 2257
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.171393381569669e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8617099808322058
          entropy_coeff: 0.009999999999999998
          kl: 0.01084914399136424
          policy_loss: 0.007579288590285513
          total_loss: 0.010385517589747905
          vf_explained_var: 0.4408843517303467
          vf_loss: 0.011423276664896145
    num_agent_steps_sampled: 609000
    num_agent_steps_trained: 609000
    num_steps_sampled: 609000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,609,15567.4,609000,-3.0787,-2.3,-4.48,307.87


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 610000
  custom_metrics: {}
  date: 2021-11-05_16-51-59
  done: false
  episode_len_mean: 304.79
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -3.047899999999979
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 4
  episodes_total: 2261
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.171393381569669e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6375507361359066
          entropy_coeff: 0.009999999999999998
          kl: 0.017395379436190222
          policy_loss: 0.0356767802602715
          total_loss: 0.03842833017309507
          vf_explained_var: 0.4071243405342102
          vf_loss: 0.009126972432972657
    num_agent_steps_sampled: 610000
    num_agent_steps_trained: 610000
    num_steps_sampled: 610000
    num_steps_trained: 610

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,610,15591.7,610000,-3.0479,-2.3,-4.48,304.79


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 611000
  custom_metrics: {}
  date: 2021-11-05_16-52-24
  done: false
  episode_len_mean: 302.5
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -3.024999999999979
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 3
  episodes_total: 2264
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.171393381569669e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7136100782288446
          entropy_coeff: 0.009999999999999998
          kl: 0.031190891059041805
          policy_loss: -0.10047503602173594
          total_loss: -0.09389593808187378
          vf_explained_var: 0.2051842212677002
          vf_loss: 0.01371503969033559
    num_agent_steps_sampled: 611000
    num_agent_steps_trained: 611000
    num_steps_sampled: 611000
    num_steps_trained: 61

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,611,15616,611000,-3.025,-2.3,-4.48,302.5


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 612000
  custom_metrics: {}
  date: 2021-11-05_16-52-48
  done: false
  episode_len_mean: 298.25
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.9824999999999795
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 4
  episodes_total: 2268
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.7570900723545e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.5441622323460049
          entropy_coeff: 0.009999999999999998
          kl: 0.005645212694602719
          policy_loss: -0.11751140629251798
          total_loss: -0.10825734419955148
          vf_explained_var: 0.2863612473011017
          vf_loss: 0.014695642671237389
    num_agent_steps_sampled: 612000
    num_agent_steps_trained: 612000
    num_steps_sampled: 612000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,612,15640.6,612000,-2.9825,-2.3,-4.48,298.25


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 613000
  custom_metrics: {}
  date: 2021-11-05_16-53-13
  done: false
  episode_len_mean: 291.44
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.914399999999981
  episode_reward_min: -4.369999999999951
  episodes_this_iter: 4
  episodes_total: 2272
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.7570900723545e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6109473019838333
          entropy_coeff: 0.009999999999999998
          kl: 0.015672370771577012
          policy_loss: -0.11368277453713947
          total_loss: -0.10491935031281577
          vf_explained_var: 0.289148211479187
          vf_loss: 0.0148727811459038
    num_agent_steps_sampled: 613000
    num_agent_steps_trained: 613000
    num_steps_sampled: 613000
    num_steps_trained: 61300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,613,15665.3,613000,-2.9144,-2.3,-4.37,291.44


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 614000
  custom_metrics: {}
  date: 2021-11-05_16-53-38
  done: false
  episode_len_mean: 284.31
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.843099999999983
  episode_reward_min: -4.0299999999999585
  episodes_this_iter: 5
  episodes_total: 2277
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.7570900723545e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.4542768551243676
          entropy_coeff: 0.009999999999999998
          kl: 0.03181882448197071
          policy_loss: -0.01572640910744667
          total_loss: -0.005043001224597295
          vf_explained_var: 0.30720847845077515
          vf_loss: 0.015225934537334574
    num_agent_steps_sampled: 614000
    num_agent_steps_trained: 614000
    num_steps_sampled: 614000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,614,15690.4,614000,-2.8431,-2.3,-4.03,284.31




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 615000
  custom_metrics: {}
  date: 2021-11-05_16-54-20
  done: false
  episode_len_mean: 278.09
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.7808999999999844
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 4
  episodes_total: 2281
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1635635108531756e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5679336584276623
          entropy_coeff: 0.009999999999999998
          kl: 0.0428652767945326
          policy_loss: 0.03183789621624682
          total_loss: 0.038249742405282124
          vf_explained_var: 0.26278653740882874
          vf_loss: 0.012090685135788388
    num_agent_steps_sampled: 615000
    num_agent_steps_trained: 615000
    num_steps_sampled: 615000
    num_steps_trained: 615000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,615,15732.2,615000,-2.7809,-2.07,-3.73,278.09


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 616000
  custom_metrics: {}
  date: 2021-11-05_16-54-46
  done: false
  episode_len_mean: 274.41
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.744099999999985
  episode_reward_min: -3.709999999999965
  episodes_this_iter: 4
  episodes_total: 2285
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7453452662797633e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.8253009272946252
          entropy_coeff: 0.009999999999999998
          kl: 0.03414433948371083
          policy_loss: 0.009819191321730614
          total_loss: 0.01383907368613614
          vf_explained_var: 0.25075286626815796
          vf_loss: 0.012272294745263127
    num_agent_steps_sampled: 616000
    num_agent_steps_trained: 616000
    num_steps_sampled: 616000
    num_steps_trained: 616000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,616,15758,616000,-2.7441,-2.07,-3.71,274.41


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 617000
  custom_metrics: {}
  date: 2021-11-05_16-55-11
  done: false
  episode_len_mean: 270.96
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.7095999999999862
  episode_reward_min: -3.709999999999965
  episodes_this_iter: 4
  episodes_total: 2289
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.618017899419645e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5414101117187076
          entropy_coeff: 0.009999999999999998
          kl: 0.08261077313258497
          policy_loss: 0.04155870924393336
          total_loss: 0.04791463125083181
          vf_explained_var: 0.19459304213523865
          vf_loss: 0.011767861692027913
    num_agent_steps_sampled: 617000
    num_agent_steps_trained: 617000
    num_steps_sampled: 617000
    num_steps_trained: 617000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,617,15783.1,617000,-2.7096,-2.07,-3.71,270.96


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 618000
  custom_metrics: {}
  date: 2021-11-05_16-55-35
  done: false
  episode_len_mean: 270.33
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.703299999999986
  episode_reward_min: -3.709999999999965
  episodes_this_iter: 4
  episodes_total: 2293
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.9270268491294676e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.6135247988833321
          entropy_coeff: 0.009999999999999998
          kl: 0.043322308169578445
          policy_loss: -0.016866792490084968
          total_loss: -0.011676766061120563
          vf_explained_var: 0.33177870512008667
          vf_loss: 0.01132357550991906
    num_agent_steps_sampled: 618000
    num_agent_steps_trained: 618000
    num_steps_sampled: 618000
    num_steps_trained: 618000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,618,15806.9,618000,-2.7033,-2.07,-3.71,270.33


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 619000
  custom_metrics: {}
  date: 2021-11-05_16-56-02
  done: false
  episode_len_mean: 268.11
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.6810999999999865
  episode_reward_min: -3.709999999999965
  episodes_this_iter: 4
  episodes_total: 2297
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.8905402736941994e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5687271896335814
          entropy_coeff: 0.009999999999999998
          kl: 0.07340734918797152
          policy_loss: 0.03432568021946483
          total_loss: 0.03861911959118313
          vf_explained_var: 0.2822924852371216
          vf_loss: 0.009976388219123085
    num_agent_steps_sampled: 619000
    num_agent_steps_trained: 619000
    num_steps_sampled: 619000
    num_steps_trained: 619000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,619,15833.6,619000,-2.6811,-2.07,-3.71,268.11


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 620000
  custom_metrics: {}
  date: 2021-11-05_16-56-24
  done: false
  episode_len_mean: 268.37
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.6836999999999858
  episode_reward_min: -4.099999999999957
  episodes_this_iter: 3
  episodes_total: 2300
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.835810410541303e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.8911317421330346
          entropy_coeff: 0.009999999999999998
          kl: 0.10215281350732713
          policy_loss: -0.04628100684947438
          total_loss: -0.04615639688240157
          vf_explained_var: -0.029849862679839134
          vf_loss: 0.00902690274330477
    num_agent_steps_sampled: 620000
    num_agent_steps_trained: 620000
    num_steps_sampled: 620000
    num_steps_trained: 620000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,620,15856.6,620000,-2.6837,-2.07,-4.1,268.37


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 621000
  custom_metrics: {}
  date: 2021-11-05_16-56-50
  done: false
  episode_len_mean: 266.55
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.665499999999987
  episode_reward_min: -4.099999999999957
  episodes_this_iter: 4
  episodes_total: 2304
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001325371561581195
          cur_lr: 5.000000000000001e-05
          entropy: 0.6508737352159288
          entropy_coeff: 0.009999999999999998
          kl: 0.01114700234501811
          policy_loss: -0.08250832855701447
          total_loss: -0.07624964345660475
          vf_explained_var: 0.48705753684043884
          vf_loss: 0.012765948464059167
    num_agent_steps_sampled: 621000
    num_agent_steps_trained: 621000
    num_steps_sampled: 621000
    num_steps_trained: 621000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,621,15882,621000,-2.6655,-2.07,-4.1,266.55


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 622000
  custom_metrics: {}
  date: 2021-11-05_16-57-15
  done: false
  episode_len_mean: 265.5
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.6549999999999874
  episode_reward_min: -4.099999999999957
  episodes_this_iter: 4
  episodes_total: 2308
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001325371561581195
          cur_lr: 5.000000000000001e-05
          entropy: 1.0506583405865564
          entropy_coeff: 0.009999999999999998
          kl: 0.0785771257239953
          policy_loss: -0.05005697732170423
          total_loss: -0.04578963658875889
          vf_explained_var: 0.25152674317359924
          vf_loss: 0.014763511375834544
    num_agent_steps_sampled: 622000
    num_agent_steps_trained: 622000
    num_steps_sampled: 622000
    num_steps_trained: 622000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,622,15907.2,622000,-2.655,-2.07,-4.1,265.5




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 623000
  custom_metrics: {}
  date: 2021-11-05_16-57-55
  done: false
  episode_len_mean: 264.53
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.6452999999999864
  episode_reward_min: -4.099999999999957
  episodes_this_iter: 4
  episodes_total: 2312
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00019880573423717925
          cur_lr: 5.000000000000001e-05
          entropy: 1.014989309840732
          entropy_coeff: 0.009999999999999998
          kl: 0.04033268850551414
          policy_loss: -0.015289430196086565
          total_loss: -0.014767727173037
          vf_explained_var: 0.48819804191589355
          vf_loss: 0.010663578917996751
    num_agent_steps_sampled: 623000
    num_agent_steps_trained: 623000
    num_steps_sampled: 623000
    num_steps_trained: 623000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,623,15946.7,623000,-2.6453,-2.07,-4.1,264.53


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 624000
  custom_metrics: {}
  date: 2021-11-05_16-58-16
  done: false
  episode_len_mean: 265.21
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.6520999999999875
  episode_reward_min: -4.099999999999957
  episodes_this_iter: 3
  episodes_total: 2315
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002982086013557689
          cur_lr: 5.000000000000001e-05
          entropy: 1.4504418743981256
          entropy_coeff: 0.009999999999999998
          kl: 0.060381100220162486
          policy_loss: -0.006882265210151672
          total_loss: -0.013842676248815324
          vf_explained_var: 0.550926923751831
          vf_loss: 0.007526002475707274
    num_agent_steps_sampled: 624000
    num_agent_steps_trained: 624000
    num_steps_sampled: 624000
    num_steps_trained: 624000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,624,15967.7,624000,-2.6521,-2.07,-4.1,265.21


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 625000
  custom_metrics: {}
  date: 2021-11-05_16-58-41
  done: false
  episode_len_mean: 263.86
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.638599999999988
  episode_reward_min: -4.099999999999957
  episodes_this_iter: 4
  episodes_total: 2319
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0004473129020336535
          cur_lr: 5.000000000000001e-05
          entropy: 0.5953181326389313
          entropy_coeff: 0.009999999999999998
          kl: 0.027325204420431407
          policy_loss: 0.014165713969204161
          total_loss: 0.01731161955330107
          vf_explained_var: 0.7241842746734619
          vf_loss: 0.009086866438802745
    num_agent_steps_sampled: 625000
    num_agent_steps_trained: 625000
    num_steps_sampled: 625000
    num_steps_trained: 625000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,625,15993,625000,-2.6386,-2.07,-4.1,263.86


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 626000
  custom_metrics: {}
  date: 2021-11-05_16-59-07
  done: false
  episode_len_mean: 262.27
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.622699999999988
  episode_reward_min: -4.099999999999957
  episodes_this_iter: 4
  episodes_total: 2323
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0006709693530504799
          cur_lr: 5.000000000000001e-05
          entropy: 0.4837418297926585
          entropy_coeff: 0.009999999999999998
          kl: 0.025590351842219722
          policy_loss: 0.06432462342911296
          total_loss: 0.0675031539466646
          vf_explained_var: 0.6676846742630005
          vf_loss: 0.007998778210538958
    num_agent_steps_sampled: 626000
    num_agent_steps_trained: 626000
    num_steps_sampled: 626000
    num_steps_trained: 626000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,626,16018.7,626000,-2.6227,-2.07,-4.1,262.27


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 627000
  custom_metrics: {}
  date: 2021-11-05_16-59-31
  done: false
  episode_len_mean: 261.57
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.6156999999999875
  episode_reward_min: -4.099999999999957
  episodes_this_iter: 4
  episodes_total: 2327
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010064540295757203
          cur_lr: 5.000000000000001e-05
          entropy: 0.5113052328427633
          entropy_coeff: 0.009999999999999998
          kl: 0.016417854246101864
          policy_loss: 0.042189987417724395
          total_loss: 0.04668420081337293
          vf_explained_var: 0.5133742690086365
          vf_loss: 0.009590744454827573
    num_agent_steps_sampled: 627000
    num_agent_steps_trained: 627000
    num_steps_sampled: 627000
    num_steps_trained: 627000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,627,16043.4,627000,-2.6157,-2.07,-4.1,261.57


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 628000
  custom_metrics: {}
  date: 2021-11-05_16-59-56
  done: false
  episode_len_mean: 261.67
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.616699999999988
  episode_reward_min: -4.099999999999957
  episodes_this_iter: 4
  episodes_total: 2331
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010064540295757203
          cur_lr: 5.000000000000001e-05
          entropy: 0.5283113512727949
          entropy_coeff: 0.009999999999999998
          kl: 0.06985380358967454
          policy_loss: 0.017564129270613193
          total_loss: 0.021935362844831413
          vf_explained_var: 0.5827560424804688
          vf_loss: 0.009584041860782438
    num_agent_steps_sampled: 628000
    num_agent_steps_trained: 628000
    num_steps_sampled: 628000
    num_steps_trained: 628000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,628,16067.5,628000,-2.6167,-2.07,-4.1,261.67


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 629000
  custom_metrics: {}
  date: 2021-11-05_17-00-21
  done: false
  episode_len_mean: 260.81
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.608099999999988
  episode_reward_min: -4.099999999999957
  episodes_this_iter: 4
  episodes_total: 2335
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015096810443635804
          cur_lr: 5.000000000000001e-05
          entropy: 0.400971351729499
          entropy_coeff: 0.009999999999999998
          kl: 0.027867263190733776
          policy_loss: 0.031594229406780666
          total_loss: 0.04081958019071155
          vf_explained_var: 0.22079361975193024
          vf_loss: 0.013192995265126229
    num_agent_steps_sampled: 629000
    num_agent_steps_trained: 629000
    num_steps_sampled: 629000
    num_steps_trained: 629000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,629,16092.9,629000,-2.6081,-2.07,-4.1,260.81


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 630000
  custom_metrics: {}
  date: 2021-11-05_17-00-48
  done: false
  episode_len_mean: 259.26
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.5925999999999885
  episode_reward_min: -4.099999999999957
  episodes_this_iter: 4
  episodes_total: 2339
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0022645215665453703
          cur_lr: 5.000000000000001e-05
          entropy: 0.4143936084376441
          entropy_coeff: 0.009999999999999998
          kl: 0.0038122897449204473
          policy_loss: 0.03004817643927203
          total_loss: 0.03811981450352404
          vf_explained_var: 0.24501541256904602
          vf_loss: 0.012206943509065442
    num_agent_steps_sampled: 630000
    num_agent_steps_trained: 630000
    num_steps_sampled: 630000
    num_steps_trained: 630000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,630,16120,630000,-2.5926,-2.07,-4.1,259.26




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 631000
  custom_metrics: {}
  date: 2021-11-05_17-01-32
  done: false
  episode_len_mean: 256.12
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.5611999999999897
  episode_reward_min: -4.099999999999957
  episodes_this_iter: 4
  episodes_total: 2343
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0011322607832726852
          cur_lr: 5.000000000000001e-05
          entropy: 0.7686184442705578
          entropy_coeff: 0.009999999999999998
          kl: 0.008403746679581225
          policy_loss: -0.046827367900146376
          total_loss: -0.04226418924000528
          vf_explained_var: 0.37879258394241333
          vf_loss: 0.01223984963984953
    num_agent_steps_sampled: 631000
    num_agent_steps_trained: 631000
    num_steps_sampled: 631000
    num_steps_trained: 631000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,631,16163.8,631000,-2.5612,-2.07,-4.1,256.12


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 632000
  custom_metrics: {}
  date: 2021-11-05_17-01-58
  done: false
  episode_len_mean: 254.37
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.5436999999999896
  episode_reward_min: -4.099999999999957
  episodes_this_iter: 5
  episodes_total: 2348
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0011322607832726852
          cur_lr: 5.000000000000001e-05
          entropy: 0.47893285221523707
          entropy_coeff: 0.009999999999999998
          kl: 0.0055704643424456255
          policy_loss: -0.03829664575556914
          total_loss: -0.029415139845675894
          vf_explained_var: 0.3955903649330139
          vf_loss: 0.013664527227067284
    num_agent_steps_sampled: 632000
    num_agent_steps_trained: 632000
    num_steps_sampled: 632000
    num_steps_trained: 632000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,632,16190.2,632000,-2.5437,-2.07,-4.1,254.37


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 633000
  custom_metrics: {}
  date: 2021-11-05_17-02-24
  done: false
  episode_len_mean: 254.0
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.5399999999999894
  episode_reward_min: -4.099999999999957
  episodes_this_iter: 4
  episodes_total: 2352
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0011322607832726852
          cur_lr: 5.000000000000001e-05
          entropy: 0.7097230692704518
          entropy_coeff: 0.009999999999999998
          kl: 0.009007982354263946
          policy_loss: -0.008694437228971057
          total_loss: -0.0054702516231271954
          vf_explained_var: 0.49531131982803345
          vf_loss: 0.01031122062769201
    num_agent_steps_sampled: 633000
    num_agent_steps_trained: 633000
    num_steps_sampled: 633000
    num_steps_trained: 633000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,633,16215.3,633000,-2.54,-2.07,-4.1,254


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 634000
  custom_metrics: {}
  date: 2021-11-05_17-02-49
  done: false
  episode_len_mean: 252.62
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.52619999999999
  episode_reward_min: -4.099999999999957
  episodes_this_iter: 4
  episodes_total: 2356
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0011322607832726852
          cur_lr: 5.000000000000001e-05
          entropy: 0.4914813386069404
          entropy_coeff: 0.009999999999999998
          kl: 0.01513193067166859
          policy_loss: 0.03305012459556262
          total_loss: 0.0369645153482755
          vf_explained_var: 0.5932263135910034
          vf_loss: 0.008812070017059644
    num_agent_steps_sampled: 634000
    num_agent_steps_trained: 634000
    num_steps_sampled: 634000
    num_steps_trained: 634000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,634,16240.9,634000,-2.5262,-2.07,-4.1,252.62


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 635000
  custom_metrics: {}
  date: 2021-11-05_17-03-14
  done: false
  episode_len_mean: 252.24
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.52239999999999
  episode_reward_min: -4.099999999999957
  episodes_this_iter: 4
  episodes_total: 2360
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0011322607832726852
          cur_lr: 5.000000000000001e-05
          entropy: 0.5169103143943681
          entropy_coeff: 0.009999999999999998
          kl: 0.03057552102949346
          policy_loss: 0.00529106209675471
          total_loss: 0.011638825883467992
          vf_explained_var: 0.4640352129936218
          vf_loss: 0.01148224463686347
    num_agent_steps_sampled: 635000
    num_agent_steps_trained: 635000
    num_steps_sampled: 635000
    num_steps_trained: 635000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,635,16266,635000,-2.5224,-2.07,-4.1,252.24


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 636000
  custom_metrics: {}
  date: 2021-11-05_17-03-41
  done: false
  episode_len_mean: 251.3
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.5129999999999897
  episode_reward_min: -4.099999999999957
  episodes_this_iter: 4
  episodes_total: 2364
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0016983911749090283
          cur_lr: 5.000000000000001e-05
          entropy: 0.28592115624083414
          entropy_coeff: 0.009999999999999998
          kl: 0.02158537271356226
          policy_loss: 0.04101558807823393
          total_loss: 0.04770987104210589
          vf_explained_var: 0.41421568393707275
          vf_loss: 0.0095168377738446
    num_agent_steps_sampled: 636000
    num_agent_steps_trained: 636000
    num_steps_sampled: 636000
    num_steps_trained: 636000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,636,16292.3,636000,-2.513,-2.07,-4.1,251.3


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 637000
  custom_metrics: {}
  date: 2021-11-05_17-04-03
  done: false
  episode_len_mean: 251.4
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.51399999999999
  episode_reward_min: -4.099999999999957
  episodes_this_iter: 3
  episodes_total: 2367
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0025475867623635418
          cur_lr: 5.000000000000001e-05
          entropy: 0.4830689373943541
          entropy_coeff: 0.009999999999999998
          kl: 0.007827209527153971
          policy_loss: -0.08486301418807772
          total_loss: -0.07848533516128858
          vf_explained_var: 0.4154103696346283
          vf_loss: 0.011188425982577934
    num_agent_steps_sampled: 637000
    num_agent_steps_trained: 637000
    num_steps_sampled: 637000
    num_steps_trained: 637000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,637,16314.2,637000,-2.514,-2.07,-4.1,251.4




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 638000
  custom_metrics: {}
  date: 2021-11-05_17-04-41
  done: false
  episode_len_mean: 254.49
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.5448999999999895
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 4
  episodes_total: 2371
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0025475867623635418
          cur_lr: 5.000000000000001e-05
          entropy: 0.8176084915796916
          entropy_coeff: 0.009999999999999998
          kl: 0.20868645762026503
          policy_loss: 0.008370447489950392
          total_loss: 0.017973870370123123
          vf_explained_var: 0.14382359385490417
          vf_loss: 0.017247861778984467
    num_agent_steps_sampled: 638000
    num_agent_steps_trained: 638000
    num_steps_sampled: 638000
    num_steps_trained: 638000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,638,16352.2,638000,-2.5449,-2.07,-4.24,254.49


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 639000
  custom_metrics: {}
  date: 2021-11-05_17-05-07
  done: false
  episode_len_mean: 254.35
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.5434999999999897
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 4
  episodes_total: 2375
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0038213801435453133
          cur_lr: 5.000000000000001e-05
          entropy: 0.753906887769699
          entropy_coeff: 0.009999999999999998
          kl: 0.0584702802043239
          policy_loss: 0.021551790833473205
          total_loss: 0.027947857562038634
          vf_explained_var: 0.18199831247329712
          vf_loss: 0.01371170154048337
    num_agent_steps_sampled: 639000
    num_agent_steps_trained: 639000
    num_steps_sampled: 639000
    num_steps_trained: 639000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,639,16378.6,639000,-2.5435,-2.07,-4.24,254.35


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 640000
  custom_metrics: {}
  date: 2021-11-05_17-05-31
  done: false
  episode_len_mean: 255.39
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.55389999999999
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 4
  episodes_total: 2379
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00573207021531797
          cur_lr: 5.000000000000001e-05
          entropy: 0.9419513437483046
          entropy_coeff: 0.009999999999999998
          kl: 0.03176629953417552
          policy_loss: -0.0087523243493504
          total_loss: -0.004053182320462333
          vf_explained_var: 0.29701030254364014
          vf_loss: 0.013936572335660457
    num_agent_steps_sampled: 640000
    num_agent_steps_trained: 640000
    num_steps_sampled: 640000
    num_steps_trained: 640000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,640,16402.6,640000,-2.5539,-2.07,-4.24,255.39


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 641000
  custom_metrics: {}
  date: 2021-11-05_17-05-56
  done: false
  episode_len_mean: 256.11
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.5610999999999886
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 3
  episodes_total: 2382
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.008598105322976952
          cur_lr: 5.000000000000001e-05
          entropy: 0.8320238229301241
          entropy_coeff: 0.009999999999999998
          kl: 0.018279264143855802
          policy_loss: -0.11056375495261617
          total_loss: -0.10589034284154573
          vf_explained_var: 0.3472868800163269
          vf_loss: 0.012836480533911123
    num_agent_steps_sampled: 641000
    num_agent_steps_trained: 641000
    num_steps_sampled: 641000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,641,16427.6,641000,-2.5611,-2.19,-4.24,256.11


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 642000
  custom_metrics: {}
  date: 2021-11-05_17-06-22
  done: false
  episode_len_mean: 256.29
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.5628999999999893
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 5
  episodes_total: 2387
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.008598105322976952
          cur_lr: 5.000000000000001e-05
          entropy: 0.8647695302963256
          entropy_coeff: 0.009999999999999998
          kl: 0.007081474967969825
          policy_loss: -0.04476247090432379
          total_loss: -0.04118698032365905
          vf_explained_var: 0.41931673884391785
          vf_loss: 0.012162299417994089
    num_agent_steps_sampled: 642000
    num_agent_steps_trained: 642000
    num_steps_sampled: 642000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,642,16453.1,642000,-2.5629,-2.19,-4.24,256.29


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 643000
  custom_metrics: {}
  date: 2021-11-05_17-06-47
  done: false
  episode_len_mean: 256.08
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.560799999999989
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 4
  episodes_total: 2391
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.008598105322976952
          cur_lr: 5.000000000000001e-05
          entropy: 0.6689934776888953
          entropy_coeff: 0.009999999999999998
          kl: 0.009986943491239074
          policy_loss: -0.01969835145605935
          total_loss: -0.015574523392650816
          vf_explained_var: 0.48780369758605957
          vf_loss: 0.010727895827343067
    num_agent_steps_sampled: 643000
    num_agent_steps_trained: 643000
    num_steps_sampled: 643000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,643,16478.7,643000,-2.5608,-2.19,-4.24,256.08


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 644000
  custom_metrics: {}
  date: 2021-11-05_17-07-13
  done: false
  episode_len_mean: 255.22
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.5521999999999894
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 4
  episodes_total: 2395
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.008598105322976952
          cur_lr: 5.000000000000001e-05
          entropy: 0.580597859289911
          entropy_coeff: 0.009999999999999998
          kl: 0.025428199700262156
          policy_loss: -0.0230453304739462
          total_loss: -0.01936935339536932
          vf_explained_var: 0.5338220596313477
          vf_loss: 0.009263321633140247
    num_agent_steps_sampled: 644000
    num_agent_steps_trained: 644000
    num_steps_sampled: 644000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,644,16504.4,644000,-2.5522,-2.19,-4.24,255.22


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 645000
  custom_metrics: {}
  date: 2021-11-05_17-07-34
  done: false
  episode_len_mean: 257.44
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.574399999999989
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 3
  episodes_total: 2398
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012897157984465429
          cur_lr: 5.000000000000001e-05
          entropy: 1.1819599270820618
          entropy_coeff: 0.009999999999999998
          kl: 0.01215986872963231
          policy_loss: 0.027378929654757182
          total_loss: 0.026528014408217535
          vf_explained_var: 0.18199379742145538
          vf_loss: 0.010811853492891208
    num_agent_steps_sampled: 645000
    num_agent_steps_trained: 645000
    num_steps_sampled: 645000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,645,16525.9,645000,-2.5744,-2.19,-4.24,257.44




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 646000
  custom_metrics: {}
  date: 2021-11-05_17-08-14
  done: false
  episode_len_mean: 257.0
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.569999999999989
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 3
  episodes_total: 2401
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012897157984465429
          cur_lr: 5.000000000000001e-05
          entropy: 1.02252250512441
          entropy_coeff: 0.009999999999999998
          kl: 0.012134171699089712
          policy_loss: -0.10742987154258622
          total_loss: -0.10431607721580399
          vf_explained_var: 0.20378507673740387
          vf_loss: 0.013182521590756046
    num_agent_steps_sampled: 646000
    num_agent_steps_trained: 646000
    num_steps_sampled: 646000
    num_steps_trained: 64

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,646,16565.5,646000,-2.57,-2.19,-4.24,257


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 647000
  custom_metrics: {}
  date: 2021-11-05_17-08-36
  done: false
  episode_len_mean: 258.46
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.5845999999999885
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 4
  episodes_total: 2405
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012897157984465429
          cur_lr: 5.000000000000001e-05
          entropy: 1.08480603562461
          entropy_coeff: 0.009999999999999998
          kl: 0.03910633999324465
          policy_loss: 0.014760348863071865
          total_loss: 0.016733075843917
          vf_explained_var: 0.2656315267086029
          vf_loss: 0.012316426784834928
    num_agent_steps_sampled: 647000
    num_agent_steps_trained: 647000
    num_steps_sampled: 647000
    num_steps_trained: 64700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,647,16587.8,647000,-2.5846,-2.19,-4.24,258.46


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 648000
  custom_metrics: {}
  date: 2021-11-05_17-08-59
  done: false
  episode_len_mean: 259.22
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.5921999999999885
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 3
  episodes_total: 2408
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.019345736976698138
          cur_lr: 5.000000000000001e-05
          entropy: 1.034663995769289
          entropy_coeff: 0.009999999999999998
          kl: 0.021035621555646443
          policy_loss: -0.07879360897673501
          total_loss: -0.07544594340854222
          vf_explained_var: 0.1543232798576355
          vf_loss: 0.013287360909291439
    num_agent_steps_sampled: 648000
    num_agent_steps_trained: 648000
    num_steps_sampled: 648000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,648,16610.6,648000,-2.5922,-2.19,-4.24,259.22


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 649000
  custom_metrics: {}
  date: 2021-11-05_17-09-20
  done: false
  episode_len_mean: 260.51
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.6050999999999878
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 4
  episodes_total: 2412
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0290186054650472
          cur_lr: 5.000000000000001e-05
          entropy: 1.190970273150338
          entropy_coeff: 0.009999999999999998
          kl: 0.02572182088608154
          policy_loss: 0.019562242676814397
          total_loss: 0.02110141134924359
          vf_explained_var: 0.25649625062942505
          vf_loss: 0.01270246065946089
    num_agent_steps_sampled: 649000
    num_agent_steps_trained: 649000
    num_steps_sampled: 649000
    num_steps_trained: 6490

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,649,16631.8,649000,-2.6051,-2.19,-4.24,260.51


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 650000
  custom_metrics: {}
  date: 2021-11-05_17-09-45
  done: false
  episode_len_mean: 258.99
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.5898999999999885
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 3
  episodes_total: 2415
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04352790819757081
          cur_lr: 5.000000000000001e-05
          entropy: 1.0924233615398407
          entropy_coeff: 0.009999999999999998
          kl: 0.023608964163065908
          policy_loss: 0.01911749392747879
          total_loss: 0.01892002796133359
          vf_explained_var: 0.2934190034866333
          vf_loss: 0.00969911987065441
    num_agent_steps_sampled: 650000
    num_agent_steps_trained: 650000
    num_steps_sampled: 650000
    num_steps_trained: 650

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,650,16656.2,650000,-2.5899,-2.19,-4.24,258.99


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 651000
  custom_metrics: {}
  date: 2021-11-05_17-10-10
  done: false
  episode_len_mean: 258.5
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.5849999999999893
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 4
  episodes_total: 2419
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06529186229635621
          cur_lr: 5.000000000000001e-05
          entropy: 1.0546279218461778
          entropy_coeff: 0.009999999999999998
          kl: 0.014435172193695204
          policy_loss: -0.10349508060349358
          total_loss: -0.09722839602165752
          vf_explained_var: 0.1810104101896286
          vf_loss: 0.01587046446899573
    num_agent_steps_sampled: 651000
    num_agent_steps_trained: 651000
    num_steps_sampled: 651000
    num_steps_trained: 65

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,651,16681,651000,-2.585,-2.19,-4.24,258.5


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 652000
  custom_metrics: {}
  date: 2021-11-05_17-10-32
  done: false
  episode_len_mean: 260.77
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.6076999999999884
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 4
  episodes_total: 2423
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06529186229635621
          cur_lr: 5.000000000000001e-05
          entropy: 1.2861577782366012
          entropy_coeff: 0.009999999999999998
          kl: 0.016408028126476716
          policy_loss: 0.04584860892759429
          total_loss: 0.04342445619404316
          vf_explained_var: 0.4774723947048187
          vf_loss: 0.009366115322336555
    num_agent_steps_sampled: 652000
    num_agent_steps_trained: 652000
    num_steps_sampled: 652000
    num_steps_trained: 65

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,652,16702.7,652000,-2.6077,-2.19,-4.24,260.77


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 653000
  custom_metrics: {}
  date: 2021-11-05_17-10-55
  done: false
  episode_len_mean: 261.42
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.614199999999988
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 3
  episodes_total: 2426
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06529186229635621
          cur_lr: 5.000000000000001e-05
          entropy: 0.9697242683834499
          entropy_coeff: 0.009999999999999998
          kl: 0.01271470758382994
          policy_loss: -0.0983857509162691
          total_loss: -0.09370082169771195
          vf_explained_var: 0.1610213965177536
          vf_loss: 0.01355200307443738
    num_agent_steps_sampled: 653000
    num_agent_steps_trained: 653000
    num_steps_sampled: 653000
    num_steps_trained: 6530

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,653,16725.9,653000,-2.6142,-2.19,-4.24,261.42




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 654000
  custom_metrics: {}
  date: 2021-11-05_17-11-34
  done: false
  episode_len_mean: 262.31
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.623099999999988
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 4
  episodes_total: 2430
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06529186229635621
          cur_lr: 5.000000000000001e-05
          entropy: 1.3653392434120177
          entropy_coeff: 0.009999999999999998
          kl: 0.03815102581325576
          policy_loss: 0.004907790074745814
          total_loss: 0.005510778145657646
          vf_explained_var: 0.14534617960453033
          vf_loss: 0.011765429584516419
    num_agent_steps_sampled: 654000
    num_agent_steps_trained: 654000
    num_steps_sampled: 654000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,654,16765.1,654000,-2.6231,-2.19,-4.24,262.31


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 655000
  custom_metrics: {}
  date: 2021-11-05_17-11-57
  done: false
  episode_len_mean: 262.91
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.629099999999988
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 3
  episodes_total: 2433
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09793779344453435
          cur_lr: 5.000000000000001e-05
          entropy: 1.1892102970017326
          entropy_coeff: 0.009999999999999998
          kl: 0.0131052159985005
          policy_loss: -0.07604664721422726
          total_loss: -0.07419451835254828
          vf_explained_var: 0.05899704247713089
          vf_loss: 0.012460734550323751
    num_agent_steps_sampled: 655000
    num_agent_steps_trained: 655000
    num_steps_sampled: 655000
    num_steps_trained: 65

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,655,16788.5,655000,-2.6291,-2.19,-4.24,262.91


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 656000
  custom_metrics: {}
  date: 2021-11-05_17-12-17
  done: false
  episode_len_mean: 265.86
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.658599999999987
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 3
  episodes_total: 2436
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09793779344453435
          cur_lr: 5.000000000000001e-05
          entropy: 1.4200284017456903
          entropy_coeff: 0.009999999999999998
          kl: 0.0139112862255559
          policy_loss: -0.12421758605374231
          total_loss: -0.1280266520049837
          vf_explained_var: 0.40709513425827026
          vf_loss: 0.009028779761865734
    num_agent_steps_sampled: 656000
    num_agent_steps_trained: 656000
    num_steps_sampled: 656000
    num_steps_trained: 656

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,656,16808.5,656000,-2.6586,-2.19,-4.24,265.86


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 657000
  custom_metrics: {}
  date: 2021-11-05_17-12-39
  done: false
  episode_len_mean: 268.55
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.685499999999987
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 4
  episodes_total: 2440
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09793779344453435
          cur_lr: 5.000000000000001e-05
          entropy: 1.2463019914097255
          entropy_coeff: 0.009999999999999998
          kl: 0.013048996836414833
          policy_loss: 0.008453949458069272
          total_loss: 0.009676494118240144
          vf_explained_var: 0.29994529485702515
          vf_loss: 0.012407574399063984
    num_agent_steps_sampled: 657000
    num_agent_steps_trained: 657000
    num_steps_sampled: 657000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,657,16830.1,657000,-2.6855,-2.19,-4.24,268.55


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 658000
  custom_metrics: {}
  date: 2021-11-05_17-13-02
  done: false
  episode_len_mean: 269.27
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.6926999999999865
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 3
  episodes_total: 2443
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09793779344453435
          cur_lr: 5.000000000000001e-05
          entropy: 1.175245339340634
          entropy_coeff: 0.009999999999999998
          kl: 0.016604142492766902
          policy_loss: -0.062264579120609495
          total_loss: -0.05819290892945395
          vf_explained_var: 0.2368878275156021
          vf_loss: 0.014197952734927336
    num_agent_steps_sampled: 658000
    num_agent_steps_trained: 658000
    num_steps_sampled: 658000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,658,16853.3,658000,-2.6927,-2.19,-4.24,269.27


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 659000
  custom_metrics: {}
  date: 2021-11-05_17-13-26
  done: false
  episode_len_mean: 271.2
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.7119999999999864
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 4
  episodes_total: 2447
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09793779344453435
          cur_lr: 5.000000000000001e-05
          entropy: 1.2450274844964346
          entropy_coeff: 0.009999999999999998
          kl: 0.010646359583765353
          policy_loss: 0.01058628617061509
          total_loss: 0.01093634929921892
          vf_explained_var: 0.27715831995010376
          vf_loss: 0.011757658432341285
    num_agent_steps_sampled: 659000
    num_agent_steps_trained: 659000
    num_steps_sampled: 659000
    num_steps_trained: 65

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,659,16876.5,659000,-2.712,-2.19,-4.24,271.2


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 660000
  custom_metrics: {}
  date: 2021-11-05_17-13-51
  done: false
  episode_len_mean: 270.99
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.709899999999986
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 4
  episodes_total: 2451
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09793779344453435
          cur_lr: 5.000000000000001e-05
          entropy: 0.9517012589507633
          entropy_coeff: 0.009999999999999998
          kl: 0.013128755113270626
          policy_loss: -0.0017722754428784052
          total_loss: 0.003808107144302792
          vf_explained_var: 0.12658260762691498
          vf_loss: 0.013811593192319076
    num_agent_steps_sampled: 660000
    num_agent_steps_trained: 660000
    num_steps_sampled: 660000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,660,16902.2,660000,-2.7099,-2.19,-4.24,270.99


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 661000
  custom_metrics: {}
  date: 2021-11-05_17-14-15
  done: false
  episode_len_mean: 271.98
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.7197999999999856
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 4
  episodes_total: 2455
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09793779344453435
          cur_lr: 5.000000000000001e-05
          entropy: 1.351986092991299
          entropy_coeff: 0.009999999999999998
          kl: 0.012789901906931552
          policy_loss: 0.020242217224505213
          total_loss: 0.0211981561448839
          vf_explained_var: 0.2193790078163147
          vf_loss: 0.013223184562391705
    num_agent_steps_sampled: 661000
    num_agent_steps_trained: 661000
    num_steps_sampled: 661000
    num_steps_trained: 661

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,661,16926,661000,-2.7198,-2.19,-4.24,271.98


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 662000
  custom_metrics: {}
  date: 2021-11-05_17-14-37
  done: false
  episode_len_mean: 273.98
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.7397999999999856
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 3
  episodes_total: 2458
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09793779344453435
          cur_lr: 5.000000000000001e-05
          entropy: 1.6405842608875698
          entropy_coeff: 0.009999999999999998
          kl: 0.013251442188205212
          policy_loss: -0.015255408651298947
          total_loss: -0.02138683083984587
          vf_explained_var: 0.22544212639331818
          vf_loss: 0.008976605888326756
    num_agent_steps_sampled: 662000
    num_agent_steps_trained: 662000
    num_steps_sampled: 662000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,662,16947.7,662000,-2.7398,-2.19,-4.24,273.98




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 663000
  custom_metrics: {}
  date: 2021-11-05_17-15-18
  done: false
  episode_len_mean: 275.07
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.750699999999985
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 4
  episodes_total: 2462
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09793779344453435
          cur_lr: 5.000000000000001e-05
          entropy: 1.547304399145974
          entropy_coeff: 0.009999999999999998
          kl: 0.021227397185696578
          policy_loss: -0.005359839896361033
          total_loss: -0.008222417409221331
          vf_explained_var: 0.5029218196868896
          vf_loss: 0.010531501337471936
    num_agent_steps_sampled: 663000
    num_agent_steps_trained: 663000
    num_steps_sampled: 663000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,663,16988.7,663000,-2.7507,-2.1,-4.24,275.07


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 664000
  custom_metrics: {}
  date: 2021-11-05_17-15-41
  done: false
  episode_len_mean: 276.12
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.7611999999999846
  episode_reward_min: -4.239999999999954
  episodes_this_iter: 3
  episodes_total: 2465
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1469066901668015
          cur_lr: 5.000000000000001e-05
          entropy: 1.220773892932468
          entropy_coeff: 0.009999999999999998
          kl: 0.011815488722116008
          policy_loss: -0.10546485487785605
          total_loss: -0.10797331900232368
          vf_explained_var: 0.6640098094940186
          vf_loss: 0.007963501998326844
    num_agent_steps_sampled: 664000
    num_agent_steps_trained: 664000
    num_steps_sampled: 664000
    num_steps_trained: 664

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,664,17011.6,664000,-2.7612,-2.1,-4.24,276.12


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 665000
  custom_metrics: {}
  date: 2021-11-05_17-16-05
  done: false
  episode_len_mean: 273.39
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.7338999999999856
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 4
  episodes_total: 2469
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1469066901668015
          cur_lr: 5.000000000000001e-05
          entropy: 1.103129678302341
          entropy_coeff: 0.009999999999999998
          kl: 0.013987012956935432
          policy_loss: -0.05346471808022923
          total_loss: -0.05312731650968393
          vf_explained_var: 0.5227300524711609
          vf_loss: 0.009313910395010477
    num_agent_steps_sampled: 665000
    num_agent_steps_trained: 665000
    num_steps_sampled: 665000
    num_steps_trained: 66

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,665,17035.9,665000,-2.7339,-2.1,-3.59,273.39


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 666000
  custom_metrics: {}
  date: 2021-11-05_17-16-29
  done: false
  episode_len_mean: 274.1
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.7409999999999854
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 4
  episodes_total: 2473
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1469066901668015
          cur_lr: 5.000000000000001e-05
          entropy: 1.240380542145835
          entropy_coeff: 0.009999999999999998
          kl: 0.02052930094398144
          policy_loss: 0.0026673808693885802
          total_loss: 0.003790818651517232
          vf_explained_var: 0.4331355690956116
          vf_loss: 0.01051135307384862
    num_agent_steps_sampled: 666000
    num_agent_steps_trained: 666000
    num_steps_sampled: 666000
    num_steps_trained: 6660

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,666,17060.1,666000,-2.741,-2.1,-3.59,274.1


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 667000
  custom_metrics: {}
  date: 2021-11-05_17-16-54
  done: false
  episode_len_mean: 273.61
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.7360999999999853
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 4
  episodes_total: 2477
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22036003525020223
          cur_lr: 5.000000000000001e-05
          entropy: 0.9565401706430647
          entropy_coeff: 0.009999999999999998
          kl: 0.012140511014283106
          policy_loss: -0.011500351896716488
          total_loss: -0.008410319768720203
          vf_explained_var: 0.37707990407943726
          vf_loss: 0.009980150224227045
    num_agent_steps_sampled: 667000
    num_agent_steps_trained: 667000
    num_steps_sampled: 667000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,667,17085.2,667000,-2.7361,-2.1,-3.59,273.61


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 668000
  custom_metrics: {}
  date: 2021-11-05_17-17-21
  done: false
  episode_len_mean: 272.94
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.729399999999986
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 4
  episodes_total: 2481
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22036003525020223
          cur_lr: 5.000000000000001e-05
          entropy: 0.9250631305906508
          entropy_coeff: 0.009999999999999998
          kl: 0.008930395568508784
          policy_loss: -0.056997082630793255
          total_loss: -0.05424772550662359
          vf_explained_var: 0.34020793437957764
          vf_loss: 0.010032082266277737
    num_agent_steps_sampled: 668000
    num_agent_steps_trained: 668000
    num_steps_sampled: 668000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,668,17111.8,668000,-2.7294,-2.1,-3.59,272.94


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 669000
  custom_metrics: {}
  date: 2021-11-05_17-17-46
  done: false
  episode_len_mean: 272.54
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.7253999999999854
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 4
  episodes_total: 2485
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22036003525020223
          cur_lr: 5.000000000000001e-05
          entropy: 0.9547080980406867
          entropy_coeff: 0.009999999999999998
          kl: 0.010144853789097219
          policy_loss: -0.0704237008260356
          total_loss: -0.06703115436765883
          vf_explained_var: 0.3867892026901245
          vf_loss: 0.010704104540248711
    num_agent_steps_sampled: 669000
    num_agent_steps_trained: 669000
    num_steps_sampled: 669000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,669,17136.8,669000,-2.7254,-2.1,-3.59,272.54




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 670000
  custom_metrics: {}
  date: 2021-11-05_17-18-30
  done: false
  episode_len_mean: 271.82
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.7181999999999853
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 5
  episodes_total: 2490
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22036003525020223
          cur_lr: 5.000000000000001e-05
          entropy: 0.9448728395832909
          entropy_coeff: 0.009999999999999998
          kl: 0.003913909076134796
          policy_loss: -0.029799339837498134
          total_loss: -0.026749841703308953
          vf_explained_var: 0.33393043279647827
          vf_loss: 0.011635755188763142
    num_agent_steps_sampled: 670000
    num_agent_steps_trained: 670000
    num_steps_sampled: 670000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,670,17180.4,670000,-2.7182,-1.95,-3.59,271.82


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 671000
  custom_metrics: {}
  date: 2021-11-05_17-18-58
  done: false
  episode_len_mean: 271.68
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.716799999999986
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 4
  episodes_total: 2494
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11018001762510111
          cur_lr: 5.000000000000001e-05
          entropy: 0.9960471583737267
          entropy_coeff: 0.009999999999999998
          kl: 0.010941571049281467
          policy_loss: 0.044179953303602006
          total_loss: 0.04621973286072413
          vf_explained_var: 0.253149151802063
          vf_loss: 0.010794708112047777
    num_agent_steps_sampled: 671000
    num_agent_steps_trained: 671000
    num_steps_sampled: 671000
    num_steps_trained: 67

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,671,17208.5,671000,-2.7168,-1.95,-3.59,271.68


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 672000
  custom_metrics: {}
  date: 2021-11-05_17-19-22
  done: false
  episode_len_mean: 270.33
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.7032999999999867
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 4
  episodes_total: 2498
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11018001762510111
          cur_lr: 5.000000000000001e-05
          entropy: 1.2353937837812636
          entropy_coeff: 0.009999999999999998
          kl: 0.027651015538887335
          policy_loss: 0.024150020711951787
          total_loss: 0.025709537085559633
          vf_explained_var: 0.27192792296409607
          vf_loss: 0.010866860662483506
    num_agent_steps_sampled: 672000
    num_agent_steps_trained: 672000
    num_steps_sampled: 672000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,672,17232.4,672000,-2.7033,-1.95,-3.59,270.33


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 673000
  custom_metrics: {}
  date: 2021-11-05_17-19-45
  done: false
  episode_len_mean: 270.37
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.7036999999999862
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 3
  episodes_total: 2501
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16527002643765168
          cur_lr: 5.000000000000001e-05
          entropy: 1.133550112777286
          entropy_coeff: 0.009999999999999998
          kl: 0.014452993586410222
          policy_loss: -0.07071782963143455
          total_loss: -0.06865663925806681
          vf_explained_var: 0.1392643302679062
          vf_loss: 0.011008043977845874
    num_agent_steps_sampled: 673000
    num_agent_steps_trained: 673000
    num_steps_sampled: 673000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,673,17255.2,673000,-2.7037,-1.95,-3.59,270.37


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 674000
  custom_metrics: {}
  date: 2021-11-05_17-20-11
  done: false
  episode_len_mean: 267.77
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.677699999999987
  episode_reward_min: -3.569999999999968
  episodes_this_iter: 5
  episodes_total: 2506
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16527002643765168
          cur_lr: 5.000000000000001e-05
          entropy: 0.9971931431028578
          entropy_coeff: 0.009999999999999998
          kl: 0.011943246391050641
          policy_loss: -0.046179631021287706
          total_loss: -0.039710964593622416
          vf_explained_var: 0.17602010071277618
          vf_loss: 0.01446673795580864
    num_agent_steps_sampled: 674000
    num_agent_steps_trained: 674000
    num_steps_sampled: 674000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,674,17281.1,674000,-2.6777,-1.95,-3.57,267.77


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 675000
  custom_metrics: {}
  date: 2021-11-05_17-20-34
  done: false
  episode_len_mean: 267.0
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.6699999999999857
  episode_reward_min: -3.569999999999968
  episodes_this_iter: 3
  episodes_total: 2509
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16527002643765168
          cur_lr: 5.000000000000001e-05
          entropy: 0.9877114964856042
          entropy_coeff: 0.009999999999999998
          kl: 0.008683444018013731
          policy_loss: 0.03413938581943512
          total_loss: 0.034841807848877374
          vf_explained_var: 0.08314277976751328
          vf_loss: 0.009144423045735392
    num_agent_steps_sampled: 675000
    num_agent_steps_trained: 675000
    num_steps_sampled: 675000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,675,17304.7,675000,-2.67,-1.95,-3.57,267


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 676000
  custom_metrics: {}
  date: 2021-11-05_17-20-59
  done: false
  episode_len_mean: 264.75
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.6474999999999875
  episode_reward_min: -3.569999999999968
  episodes_this_iter: 4
  episodes_total: 2513
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16527002643765168
          cur_lr: 5.000000000000001e-05
          entropy: 1.0583548479610019
          entropy_coeff: 0.009999999999999998
          kl: 0.014163382055145017
          policy_loss: -0.09850252792239189
          total_loss: -0.09409637442893452
          vf_explained_var: 0.44301101565361023
          vf_loss: 0.01264891877459983
    num_agent_steps_sampled: 676000
    num_agent_steps_trained: 676000
    num_steps_sampled: 676000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,676,17329.3,676000,-2.6475,-1.95,-3.57,264.75


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 677000
  custom_metrics: {}
  date: 2021-11-05_17-21-23
  done: false
  episode_len_mean: 265.95
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.6594999999999875
  episode_reward_min: -3.569999999999968
  episodes_this_iter: 4
  episodes_total: 2517
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16527002643765168
          cur_lr: 5.000000000000001e-05
          entropy: 1.2458008567492167
          entropy_coeff: 0.009999999999999998
          kl: 0.022543144245755445
          policy_loss: 0.041672514751553535
          total_loss: 0.04388281889259815
          vf_explained_var: 0.27521270513534546
          vf_loss: 0.010942604899820354
    num_agent_steps_sampled: 677000
    num_agent_steps_trained: 677000
    num_steps_sampled: 677000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,677,17353.1,677000,-2.6595,-1.95,-3.57,265.95




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 678000
  custom_metrics: {}
  date: 2021-11-05_17-22-03
  done: false
  episode_len_mean: 265.61
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.656099999999987
  episode_reward_min: -3.569999999999968
  episodes_this_iter: 4
  episodes_total: 2521
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2479050396564775
          cur_lr: 5.000000000000001e-05
          entropy: 1.1711520771185557
          entropy_coeff: 0.009999999999999998
          kl: 0.009517658774497227
          policy_loss: 0.046449694616927044
          total_loss: 0.04892999575369888
          vf_explained_var: 0.370747834444046
          vf_loss: 0.011832348412523667
    num_agent_steps_sampled: 678000
    num_agent_steps_trained: 678000
    num_steps_sampled: 678000
    num_steps_trained: 6780

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,678,17393.1,678000,-2.6561,-1.95,-3.57,265.61


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 679000
  custom_metrics: {}
  date: 2021-11-05_17-22-26
  done: false
  episode_len_mean: 265.86
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.6585999999999865
  episode_reward_min: -3.569999999999968
  episodes_this_iter: 3
  episodes_total: 2524
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2479050396564775
          cur_lr: 5.000000000000001e-05
          entropy: 1.093488210439682
          entropy_coeff: 0.009999999999999998
          kl: 0.007852056992569266
          policy_loss: -0.08423460875120428
          total_loss: -0.0836240010129081
          vf_explained_var: 0.47431766986846924
          vf_loss: 0.009598925140582852
    num_agent_steps_sampled: 679000
    num_agent_steps_trained: 679000
    num_steps_sampled: 679000
    num_steps_trained: 67

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,679,17415.9,679000,-2.6586,-1.95,-3.57,265.86


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 680000
  custom_metrics: {}
  date: 2021-11-05_17-22-48
  done: false
  episode_len_mean: 265.61
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.6560999999999875
  episode_reward_min: -3.569999999999968
  episodes_this_iter: 4
  episodes_total: 2528
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2479050396564775
          cur_lr: 5.000000000000001e-05
          entropy: 1.3801385151015388
          entropy_coeff: 0.009999999999999998
          kl: 0.026518306061057394
          policy_loss: 0.04722795577512847
          total_loss: 0.05425762633482615
          vf_explained_var: 0.12148942053318024
          vf_loss: 0.014257033236531748
    num_agent_steps_sampled: 680000
    num_agent_steps_trained: 680000
    num_steps_sampled: 680000
    num_steps_trained: 68

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,680,17438.6,680000,-2.6561,-1.95,-3.57,265.61


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 681000
  custom_metrics: {}
  date: 2021-11-05_17-23-13
  done: false
  episode_len_mean: 264.52
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.645199999999987
  episode_reward_min: -3.569999999999968
  episodes_this_iter: 4
  episodes_total: 2532
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.37185755948471627
          cur_lr: 5.000000000000001e-05
          entropy: 1.3890385548273723
          entropy_coeff: 0.009999999999999998
          kl: 0.007168634045254052
          policy_loss: 0.041474436678820184
          total_loss: 0.04219115860760212
          vf_explained_var: 0.23850521445274353
          vf_loss: 0.011941398080024455
    num_agent_steps_sampled: 681000
    num_agent_steps_trained: 681000
    num_steps_sampled: 681000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,681,17463.3,681000,-2.6452,-1.95,-3.57,264.52


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 682000
  custom_metrics: {}
  date: 2021-11-05_17-23-34
  done: false
  episode_len_mean: 264.41
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.644099999999988
  episode_reward_min: -3.569999999999968
  episodes_this_iter: 3
  episodes_total: 2535
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.37185755948471627
          cur_lr: 5.000000000000001e-05
          entropy: 1.6018058313263788
          entropy_coeff: 0.009999999999999998
          kl: 0.009251457552097856
          policy_loss: 0.04130426189965672
          total_loss: 0.036425859563880496
          vf_explained_var: 0.5056701898574829
          vf_loss: 0.0076994299041366
    num_agent_steps_sampled: 682000
    num_agent_steps_trained: 682000
    num_steps_sampled: 682000
    num_steps_trained: 6820

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,682,17484.5,682000,-2.6441,-1.95,-3.57,264.41


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 683000
  custom_metrics: {}
  date: 2021-11-05_17-23-57
  done: false
  episode_len_mean: 262.81
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.6280999999999874
  episode_reward_min: -3.569999999999968
  episodes_this_iter: 4
  episodes_total: 2539
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.37185755948471627
          cur_lr: 5.000000000000001e-05
          entropy: 1.3018475698100196
          entropy_coeff: 0.009999999999999998
          kl: 0.007944132973523525
          policy_loss: -0.050284039187762475
          total_loss: -0.04687469659580125
          vf_explained_var: 0.19834637641906738
          vf_loss: 0.013473732252087858
    num_agent_steps_sampled: 683000
    num_agent_steps_trained: 683000
    num_steps_sampled: 683000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,683,17507.7,683000,-2.6281,-1.95,-3.57,262.81


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 684000
  custom_metrics: {}
  date: 2021-11-05_17-24-23
  done: false
  episode_len_mean: 261.28
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.6127999999999876
  episode_reward_min: -3.569999999999968
  episodes_this_iter: 4
  episodes_total: 2543
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.37185755948471627
          cur_lr: 5.000000000000001e-05
          entropy: 0.9758242269357046
          entropy_coeff: 0.009999999999999998
          kl: 0.004649465534333786
          policy_loss: 0.07014871074093712
          total_loss: 0.07283034142520693
          vf_explained_var: 0.15940403938293457
          vf_loss: 0.010710931590033902
    num_agent_steps_sampled: 684000
    num_agent_steps_trained: 684000
    num_steps_sampled: 684000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,684,17533.6,684000,-2.6128,-1.95,-3.57,261.28


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 685000
  custom_metrics: {}
  date: 2021-11-05_17-24-47
  done: false
  episode_len_mean: 261.15
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.611499999999988
  episode_reward_min: -3.569999999999968
  episodes_this_iter: 4
  episodes_total: 2547
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18592877974235814
          cur_lr: 5.000000000000001e-05
          entropy: 1.4026055210166508
          entropy_coeff: 0.009999999999999998
          kl: 0.013153287352122373
          policy_loss: -0.017651847667164274
          total_loss: -0.016760995735724767
          vf_explained_var: 0.4737913906574249
          vf_loss: 0.012471333394447963
    num_agent_steps_sampled: 685000
    num_agent_steps_trained: 685000
    num_steps_sampled: 685000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,685,17556.9,685000,-2.6115,-1.95,-3.57,261.15




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 686000
  custom_metrics: {}
  date: 2021-11-05_17-25-30
  done: false
  episode_len_mean: 260.71
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.6070999999999884
  episode_reward_min: -3.569999999999968
  episodes_this_iter: 4
  episodes_total: 2551
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18592877974235814
          cur_lr: 5.000000000000001e-05
          entropy: 1.076831019586987
          entropy_coeff: 0.009999999999999998
          kl: 0.013836917458414809
          policy_loss: 0.02372124567627907
          total_loss: 0.028245548655589422
          vf_explained_var: 0.21881192922592163
          vf_loss: 0.012719929828825924
    num_agent_steps_sampled: 686000
    num_agent_steps_trained: 686000
    num_steps_sampled: 686000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,686,17600.6,686000,-2.6071,-1.95,-3.57,260.71


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 687000
  custom_metrics: {}
  date: 2021-11-05_17-25-53
  done: false
  episode_len_mean: 260.72
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.607199999999988
  episode_reward_min: -3.569999999999968
  episodes_this_iter: 3
  episodes_total: 2554
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18592877974235814
          cur_lr: 5.000000000000001e-05
          entropy: 1.4121779561042787
          entropy_coeff: 0.009999999999999998
          kl: 0.02755134649056801
          policy_loss: -0.04891208368870947
          total_loss: -0.04574321773317125
          vf_explained_var: 0.48786380887031555
          vf_loss: 0.012168055353686213
    num_agent_steps_sampled: 687000
    num_agent_steps_trained: 687000
    num_steps_sampled: 687000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,687,17623.5,687000,-2.6072,-1.95,-3.57,260.72


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 688000
  custom_metrics: {}
  date: 2021-11-05_17-26-16
  done: false
  episode_len_mean: 260.34
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.603399999999988
  episode_reward_min: -3.5299999999999687
  episodes_this_iter: 4
  episodes_total: 2558
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2788931696135373
          cur_lr: 5.000000000000001e-05
          entropy: 1.4007988227738275
          entropy_coeff: 0.009999999999999998
          kl: 0.01242655707185452
          policy_loss: -0.024075980484485626
          total_loss: -0.026158520579338075
          vf_explained_var: 0.6189760565757751
          vf_loss: 0.008459767331886622
    num_agent_steps_sampled: 688000
    num_agent_steps_trained: 688000
    num_steps_sampled: 688000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,688,17646.3,688000,-2.6034,-1.95,-3.53,260.34


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 689000
  custom_metrics: {}
  date: 2021-11-05_17-26-41
  done: false
  episode_len_mean: 259.43
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.5942999999999885
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 2562
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2788931696135373
          cur_lr: 5.000000000000001e-05
          entropy: 1.1016704360644023
          entropy_coeff: 0.009999999999999998
          kl: 0.00949212555772421
          policy_loss: 0.027626173943281172
          total_loss: 0.03326147538092401
          vf_explained_var: 0.15441535413265228
          vf_loss: 0.014004716608259414
    num_agent_steps_sampled: 689000
    num_agent_steps_trained: 689000
    num_steps_sampled: 689000
    num_steps_trained: 689

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,689,17670.7,689000,-2.5943,-1.95,-3.48,259.43


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 690000
  custom_metrics: {}
  date: 2021-11-05_17-27-05
  done: false
  episode_len_mean: 258.42
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.584199999999988
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 2566
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2788931696135373
          cur_lr: 5.000000000000001e-05
          entropy: 1.0651574889818827
          entropy_coeff: 0.009999999999999998
          kl: 0.009388468991525695
          policy_loss: 0.005819077417254448
          total_loss: 0.00998991293211778
          vf_explained_var: 0.24359771609306335
          vf_loss: 0.012204033726205429
    num_agent_steps_sampled: 690000
    num_agent_steps_trained: 690000
    num_steps_sampled: 690000
    num_steps_trained: 690

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,690,17695.4,690000,-2.5842,-1.95,-3.48,258.42


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 691000
  custom_metrics: {}
  date: 2021-11-05_17-27-29
  done: false
  episode_len_mean: 258.7
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.586999999999989
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 2570
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2788931696135373
          cur_lr: 5.000000000000001e-05
          entropy: 1.1472856097751194
          entropy_coeff: 0.009999999999999998
          kl: 0.015417847099650809
          policy_loss: -0.0028528210603528553
          total_loss: 0.004984612473183208
          vf_explained_var: 0.03774317726492882
          vf_loss: 0.015010359966092639
    num_agent_steps_sampled: 691000
    num_agent_steps_trained: 691000
    num_steps_sampled: 691000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,691,17718.9,691000,-2.587,-1.95,-3.48,258.7


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 692000
  custom_metrics: {}
  date: 2021-11-05_17-27-54
  done: false
  episode_len_mean: 258.22
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.5821999999999883
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 2574
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2788931696135373
          cur_lr: 5.000000000000001e-05
          entropy: 1.1103057364622753
          entropy_coeff: 0.009999999999999998
          kl: 0.006800418794123589
          policy_loss: 0.019485667927397622
          total_loss: 0.022325837777720556
          vf_explained_var: 0.27810585498809814
          vf_loss: 0.012046636454761029
    num_agent_steps_sampled: 692000
    num_agent_steps_trained: 692000
    num_steps_sampled: 692000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,692,17744.3,692000,-2.5822,-1.95,-3.48,258.22


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 693000
  custom_metrics: {}
  date: 2021-11-05_17-28-19
  done: false
  episode_len_mean: 258.39
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.5838999999999883
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 2578
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2788931696135373
          cur_lr: 5.000000000000001e-05
          entropy: 1.2174434330728319
          entropy_coeff: 0.009999999999999998
          kl: 0.0074705557069070375
          policy_loss: -0.023548743625481924
          total_loss: -0.020790408303340276
          vf_explained_var: 0.31015098094940186
          vf_loss: 0.012849280941817495
    num_agent_steps_sampled: 693000
    num_agent_steps_trained: 693000
    num_steps_sampled: 693000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,693,17768.7,693000,-2.5839,-1.95,-3.48,258.39




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 694000
  custom_metrics: {}
  date: 2021-11-05_17-29-02
  done: false
  episode_len_mean: 258.68
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.586799999999988
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 2582
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2788931696135373
          cur_lr: 5.000000000000001e-05
          entropy: 1.0392375588417053
          entropy_coeff: 0.009999999999999998
          kl: 0.004948130035973009
          policy_loss: 0.01848942070371575
          total_loss: 0.018382069551282458
          vf_explained_var: 0.5770459771156311
          vf_loss: 0.008905024785134528
    num_agent_steps_sampled: 694000
    num_agent_steps_trained: 694000
    num_steps_sampled: 694000
    num_steps_trained: 6940

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,694,17812.1,694000,-2.5868,-1.95,-3.48,258.68


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 695000
  custom_metrics: {}
  date: 2021-11-05_17-29-27
  done: false
  episode_len_mean: 258.94
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.589399999999988
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 2586
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13944658480676864
          cur_lr: 5.000000000000001e-05
          entropy: 1.0460228906737434
          entropy_coeff: 0.009999999999999998
          kl: 0.01398949475517573
          policy_loss: 0.0404687208433946
          total_loss: 0.04472824682792028
          vf_explained_var: 0.3896828591823578
          vf_loss: 0.012768967025395896
    num_agent_steps_sampled: 695000
    num_agent_steps_trained: 695000
    num_steps_sampled: 695000
    num_steps_trained: 695000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,695,17836.5,695000,-2.5894,-1.95,-3.48,258.94


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 696000
  custom_metrics: {}
  date: 2021-11-05_17-29-50
  done: false
  episode_len_mean: 259.97
  episode_media: {}
  episode_reward_max: -1.9500000000000015
  episode_reward_mean: -2.5996999999999884
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 3
  episodes_total: 2589
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13944658480676864
          cur_lr: 5.000000000000001e-05
          entropy: 1.1463552461730109
          entropy_coeff: 0.009999999999999998
          kl: 0.012429598144262382
          policy_loss: -0.07537117401758829
          total_loss: -0.07366110260287921
          vf_explained_var: 0.29029178619384766
          vf_loss: 0.011440355258269443
    num_agent_steps_sampled: 696000
    num_agent_steps_trained: 696000
    num_steps_sampled: 696000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,696,17860.1,696000,-2.5997,-1.95,-3.48,259.97


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 697000
  custom_metrics: {}
  date: 2021-11-05_17-30-15
  done: false
  episode_len_mean: 261.03
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.6102999999999885
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 2593
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13944658480676864
          cur_lr: 5.000000000000001e-05
          entropy: 1.105130300256941
          entropy_coeff: 0.009999999999999998
          kl: 0.009849267846596671
          policy_loss: -0.09083489817049768
          total_loss: -0.08845000080764294
          vf_explained_var: 0.40248221158981323
          vf_loss: 0.012062750932657056
    num_agent_steps_sampled: 697000
    num_agent_steps_trained: 697000
    num_steps_sampled: 697000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,697,17884.8,697000,-2.6103,-1.97,-3.48,261.03


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 698000
  custom_metrics: {}
  date: 2021-11-05_17-30-38
  done: false
  episode_len_mean: 262.21
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.6220999999999877
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 2597
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13944658480676864
          cur_lr: 5.000000000000001e-05
          entropy: 1.2033400535583496
          entropy_coeff: 0.009999999999999998
          kl: 0.011323181908748994
          policy_loss: -0.01030556278096305
          total_loss: -0.008682124316692352
          vf_explained_var: 0.2955724895000458
          vf_loss: 0.012077859323471784
    num_agent_steps_sampled: 698000
    num_agent_steps_trained: 698000
    num_steps_sampled: 698000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,698,17907.7,698000,-2.6221,-1.97,-3.48,262.21


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 699000
  custom_metrics: {}
  date: 2021-11-05_17-31-01
  done: false
  episode_len_mean: 261.65
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.6164999999999883
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 2601
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13944658480676864
          cur_lr: 5.000000000000001e-05
          entropy: 1.2248278458913167
          entropy_coeff: 0.009999999999999998
          kl: 0.010102020138589296
          policy_loss: 0.039854323036140865
          total_loss: 0.03946943605939547
          vf_explained_var: 0.3398503065109253
          vf_loss: 0.010454698408850365
    num_agent_steps_sampled: 699000
    num_agent_steps_trained: 699000
    num_steps_sampled: 699000
    num_steps_trained: 69

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,699,17931.2,699000,-2.6165,-1.97,-3.48,261.65


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 700000
  custom_metrics: {}
  date: 2021-11-05_17-31-23
  done: false
  episode_len_mean: 263.01
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.630099999999988
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 3
  episodes_total: 2604
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13944658480676864
          cur_lr: 5.000000000000001e-05
          entropy: 1.1880107561747233
          entropy_coeff: 0.009999999999999998
          kl: 0.011581852260427973
          policy_loss: 0.025661172345280647
          total_loss: 0.02303191882868608
          vf_explained_var: 0.3339095711708069
          vf_loss: 0.007635804431305991
    num_agent_steps_sampled: 700000
    num_agent_steps_trained: 700000
    num_steps_sampled: 700000
    num_steps_trained: 700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,700,17952.7,700000,-2.6301,-1.97,-3.48,263.01


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 701000
  custom_metrics: {}
  date: 2021-11-05_17-31-45
  done: false
  episode_len_mean: 264.77
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.6476999999999875
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 2608
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13944658480676864
          cur_lr: 5.000000000000001e-05
          entropy: 1.225756091541714
          entropy_coeff: 0.009999999999999998
          kl: 0.021397714759169597
          policy_loss: 0.020459971494144864
          total_loss: 0.02540437968240844
          vf_explained_var: 0.12087846547365189
          vf_loss: 0.014218131432102786
    num_agent_steps_sampled: 701000
    num_agent_steps_trained: 701000
    num_steps_sampled: 701000
    num_steps_trained: 70

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,701,17974.9,701000,-2.6477,-1.97,-3.48,264.77




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 702000
  custom_metrics: {}
  date: 2021-11-05_17-32-24
  done: false
  episode_len_mean: 265.63
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.6562999999999874
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 3
  episodes_total: 2611
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20916987721015298
          cur_lr: 5.000000000000001e-05
          entropy: 1.2802677366468642
          entropy_coeff: 0.009999999999999998
          kl: 0.017794042018261142
          policy_loss: 0.01983058750629425
          total_loss: 0.019913486970795525
          vf_explained_var: -0.29934924840927124
          vf_loss: 0.00916359558986086
    num_agent_steps_sampled: 702000
    num_agent_steps_trained: 702000
    num_steps_sampled: 702000
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,702,18014.1,702000,-2.6563,-1.97,-3.48,265.63


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 703000
  custom_metrics: {}
  date: 2021-11-05_17-32-48
  done: false
  episode_len_mean: 266.13
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.661299999999987
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 2615
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20916987721015298
          cur_lr: 5.000000000000001e-05
          entropy: 1.2625669598579408
          entropy_coeff: 0.009999999999999998
          kl: 0.012938923482649795
          policy_loss: 0.021511167536179224
          total_loss: 0.02540836524632242
          vf_explained_var: 0.21495845913887024
          vf_loss: 0.013816433275739353
    num_agent_steps_sampled: 703000
    num_agent_steps_trained: 703000
    num_steps_sampled: 703000
    num_steps_trained: 70

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,703,18037.8,703000,-2.6613,-1.97,-3.48,266.13


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 704000
  custom_metrics: {}
  date: 2021-11-05_17-33-10
  done: false
  episode_len_mean: 267.12
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.671199999999987
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 3
  episodes_total: 2618
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20916987721015298
          cur_lr: 5.000000000000001e-05
          entropy: 1.148199326462216
          entropy_coeff: 0.009999999999999998
          kl: 0.009741794456915226
          policy_loss: 0.0009351816442277696
          total_loss: 0.0010129147933589087
          vf_explained_var: 0.27366095781326294
          vf_loss: 0.009522040948892634
    num_agent_steps_sampled: 704000
    num_agent_steps_trained: 704000
    num_steps_sampled: 704000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,704,18060.1,704000,-2.6712,-1.97,-3.48,267.12


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 705000
  custom_metrics: {}
  date: 2021-11-05_17-33-35
  done: false
  episode_len_mean: 266.09
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.6608999999999874
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 2622
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20916987721015298
          cur_lr: 5.000000000000001e-05
          entropy: 1.2922113882170783
          entropy_coeff: 0.009999999999999998
          kl: 0.007334877414073778
          policy_loss: -0.0018363490700721741
          total_loss: 0.00023910303910573322
          vf_explained_var: 0.16716152429580688
          vf_loss: 0.013463327216191425
    num_agent_steps_sampled: 705000
    num_agent_steps_trained: 705000
    num_steps_sampled: 705000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,705,18084.2,705000,-2.6609,-1.97,-3.48,266.09


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 706000
  custom_metrics: {}
  date: 2021-11-05_17-33-59
  done: false
  episode_len_mean: 265.54
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.6553999999999873
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 2626
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20916987721015298
          cur_lr: 5.000000000000001e-05
          entropy: 1.1666796273655362
          entropy_coeff: 0.009999999999999998
          kl: 0.009376988785961278
          policy_loss: 0.005061784841948086
          total_loss: 0.008312066975567077
          vf_explained_var: 0.2916804850101471
          vf_loss: 0.012955695618357924
    num_agent_steps_sampled: 706000
    num_agent_steps_trained: 706000
    num_steps_sampled: 706000
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,706,18108.6,706000,-2.6554,-1.97,-3.48,265.54


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 707000
  custom_metrics: {}
  date: 2021-11-05_17-34-23
  done: false
  episode_len_mean: 265.12
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.6511999999999873
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 2630
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20916987721015298
          cur_lr: 5.000000000000001e-05
          entropy: 1.1160897109243604
          entropy_coeff: 0.009999999999999998
          kl: 0.0060776074384672266
          policy_loss: 0.021590768463081784
          total_loss: 0.022451683382193246
          vf_explained_var: 0.4410450756549835
          vf_loss: 0.010750562469992373
    num_agent_steps_sampled: 707000
    num_agent_steps_trained: 707000
    num_steps_sampled: 707000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,707,18133,707000,-2.6512,-1.97,-3.48,265.12


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 708000
  custom_metrics: {}
  date: 2021-11-05_17-34-49
  done: false
  episode_len_mean: 263.77
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.6376999999999873
  episode_reward_min: -3.46999999999997
  episodes_this_iter: 4
  episodes_total: 2634
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20916987721015298
          cur_lr: 5.000000000000001e-05
          entropy: 1.1039081904623242
          entropy_coeff: 0.009999999999999998
          kl: 0.016939333252814017
          policy_loss: 0.048910846064488096
          total_loss: 0.05274244849052694
          vf_explained_var: 0.4934958219528198
          vf_loss: 0.011327484022412035
    num_agent_steps_sampled: 708000
    num_agent_steps_trained: 708000
    num_steps_sampled: 708000
    num_steps_trained: 70

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,708,18158.4,708000,-2.6377,-1.97,-3.47,263.77


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 709000
  custom_metrics: {}
  date: 2021-11-05_17-35-12
  done: false
  episode_len_mean: 263.39
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.6338999999999873
  episode_reward_min: -3.46999999999997
  episodes_this_iter: 3
  episodes_total: 2637
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20916987721015298
          cur_lr: 5.000000000000001e-05
          entropy: 1.1284831371572284
          entropy_coeff: 0.009999999999999998
          kl: 0.014039969935497096
          policy_loss: -0.12705702061454455
          total_loss: -0.12361091433299912
          vf_explained_var: 0.5728051662445068
          vf_loss: 0.01179419896668858
    num_agent_steps_sampled: 709000
    num_agent_steps_trained: 709000
    num_steps_sampled: 709000
    num_steps_trained: 70

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,709,18181.5,709000,-2.6339,-1.97,-3.47,263.39




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 710000
  custom_metrics: {}
  date: 2021-11-05_17-35-55
  done: false
  episode_len_mean: 263.75
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.6374999999999873
  episode_reward_min: -3.46999999999997
  episodes_this_iter: 4
  episodes_total: 2641
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20916987721015298
          cur_lr: 5.000000000000001e-05
          entropy: 1.115210567580329
          entropy_coeff: 0.009999999999999998
          kl: 0.013728363833053021
          policy_loss: -0.013282050026787652
          total_loss: -0.009478774418433507
          vf_explained_var: 0.4419685900211334
          vf_loss: 0.012083820150130325
    num_agent_steps_sampled: 710000
    num_agent_steps_trained: 710000
    num_steps_sampled: 710000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,710,18224.8,710000,-2.6375,-1.97,-3.47,263.75


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 711000
  custom_metrics: {}
  date: 2021-11-05_17-36-20
  done: false
  episode_len_mean: 262.6
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.625999999999988
  episode_reward_min: -3.46999999999997
  episodes_this_iter: 4
  episodes_total: 2645
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20916987721015298
          cur_lr: 5.000000000000001e-05
          entropy: 1.145229493247138
          entropy_coeff: 0.009999999999999998
          kl: 0.01620078778428401
          policy_loss: -0.10192826290925344
          total_loss: -0.09369305724071132
          vf_explained_var: 0.353863000869751
          vf_loss: 0.01629878491577175
    num_agent_steps_sampled: 711000
    num_agent_steps_trained: 711000
    num_steps_sampled: 711000
    num_steps_trained: 711000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,711,18249.7,711000,-2.626,-1.97,-3.47,262.6


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 712000
  custom_metrics: {}
  date: 2021-11-05_17-36-44
  done: false
  episode_len_mean: 263.31
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.6330999999999873
  episode_reward_min: -3.46999999999997
  episodes_this_iter: 4
  episodes_total: 2649
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20916987721015298
          cur_lr: 5.000000000000001e-05
          entropy: 1.1057459870974222
          entropy_coeff: 0.009999999999999998
          kl: 0.005859692658066251
          policy_loss: -0.10888170227408409
          total_loss: -0.10606375121408039
          vf_explained_var: 0.4556163251399994
          vf_loss: 0.012649742420762777
    num_agent_steps_sampled: 712000
    num_agent_steps_trained: 712000
    num_steps_sampled: 712000
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,712,18273.3,712000,-2.6331,-1.97,-3.47,263.31


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 713000
  custom_metrics: {}
  date: 2021-11-05_17-37-03
  done: false
  episode_len_mean: 265.69
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.656899999999987
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 3
  episodes_total: 2652
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20916987721015298
          cur_lr: 5.000000000000001e-05
          entropy: 1.224630089600881
          entropy_coeff: 0.009999999999999998
          kl: 0.0156610868870522
          policy_loss: -0.008420184751351674
          total_loss: -0.00703853749566608
          vf_explained_var: 0.36307623982429504
          vf_loss: 0.010352117785563072
    num_agent_steps_sampled: 713000
    num_agent_steps_trained: 713000
    num_steps_sampled: 713000
    num_steps_trained: 71

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,713,18292.5,713000,-2.6569,-2.23,-4.07,265.69


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 714000
  custom_metrics: {}
  date: 2021-11-05_17-37-22
  done: false
  episode_len_mean: 268.43
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.6842999999999857
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 3
  episodes_total: 2655
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20916987721015298
          cur_lr: 5.000000000000001e-05
          entropy: 1.3900150616963705
          entropy_coeff: 0.009999999999999998
          kl: 0.014757072756842227
          policy_loss: 0.04226246202985446
          total_loss: 0.040637364155716366
          vf_explained_var: 0.24609065055847168
          vf_loss: 0.009188318469872078
    num_agent_steps_sampled: 714000
    num_agent_steps_trained: 714000
    num_steps_sampled: 714000
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,714,18311.4,714000,-2.6843,-2.25,-4.07,268.43


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 715000
  custom_metrics: {}
  date: 2021-11-05_17-37-40
  done: false
  episode_len_mean: 270.59
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.7058999999999855
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 3
  episodes_total: 2658
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20916987721015298
          cur_lr: 5.000000000000001e-05
          entropy: 1.3264981852637396
          entropy_coeff: 0.009999999999999998
          kl: 0.017926225671790755
          policy_loss: 0.04501328575942251
          total_loss: 0.045610161870718
          vf_explained_var: -0.14681842923164368
          vf_loss: 0.010112226727263382
    num_agent_steps_sampled: 715000
    num_agent_steps_trained: 715000
    num_steps_sampled: 715000
    num_steps_trained: 715

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,715,18329.9,715000,-2.7059,-2.25,-4.07,270.59


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 716000
  custom_metrics: {}
  date: 2021-11-05_17-38-04
  done: false
  episode_len_mean: 271.04
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.710399999999986
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 3
  episodes_total: 2661
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20916987721015298
          cur_lr: 5.000000000000001e-05
          entropy: 1.2785295420222813
          entropy_coeff: 0.009999999999999998
          kl: 0.025716328433347733
          policy_loss: -0.1251883750160535
          total_loss: -0.11830438449978828
          vf_explained_var: 0.11303090304136276
          vf_loss: 0.01429020409575767
    num_agent_steps_sampled: 716000
    num_agent_steps_trained: 716000
    num_steps_sampled: 716000
    num_steps_trained: 716

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,716,18353.2,716000,-2.7104,-2.25,-4.07,271.04


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 717000
  custom_metrics: {}
  date: 2021-11-05_17-38-25
  done: false
  episode_len_mean: 273.09
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.7308999999999855
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 4
  episodes_total: 2665
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31375481581522946
          cur_lr: 5.000000000000001e-05
          entropy: 1.5645541217592027
          entropy_coeff: 0.009999999999999998
          kl: 0.010347000968041047
          policy_loss: 0.01034523273507754
          total_loss: 0.012759274947974417
          vf_explained_var: 0.14585480093955994
          vf_loss: 0.014813159964978695
    num_agent_steps_sampled: 717000
    num_agent_steps_trained: 717000
    num_steps_sampled: 717000
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,717,18374.2,717000,-2.7309,-2.25,-4.07,273.09


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 718000
  custom_metrics: {}
  date: 2021-11-05_17-38-48
  done: false
  episode_len_mean: 273.95
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.739499999999985
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 3
  episodes_total: 2668
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31375481581522946
          cur_lr: 5.000000000000001e-05
          entropy: 1.44431727859709
          entropy_coeff: 0.009999999999999998
          kl: 0.00989669909491041
          policy_loss: -0.013917322787973616
          total_loss: -0.014298983580536313
          vf_explained_var: -0.12782779335975647
          vf_loss: 0.010956374260907372
    num_agent_steps_sampled: 718000
    num_agent_steps_trained: 718000
    num_steps_sampled: 718000
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,718,18397.2,718000,-2.7395,-2.25,-4.07,273.95




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 719000
  custom_metrics: {}
  date: 2021-11-05_17-39-29
  done: false
  episode_len_mean: 274.39
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.743899999999986
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 4
  episodes_total: 2672
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31375481581522946
          cur_lr: 5.000000000000001e-05
          entropy: 1.3569166951709324
          entropy_coeff: 0.009999999999999998
          kl: 0.008943214462354377
          policy_loss: -0.0013161914216147529
          total_loss: 0.001342109673553043
          vf_explained_var: 0.2568754553794861
          vf_loss: 0.013421491160988808
    num_agent_steps_sampled: 719000
    num_agent_steps_trained: 719000
    num_steps_sampled: 719000
    num_steps_trained: 719000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,719,18438.1,719000,-2.7439,-2.06,-4.07,274.39


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 720000
  custom_metrics: {}
  date: 2021-11-05_17-39-53
  done: false
  episode_len_mean: 274.6
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.7459999999999853
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 4
  episodes_total: 2676
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31375481581522946
          cur_lr: 5.000000000000001e-05
          entropy: 1.2532931274837917
          entropy_coeff: 0.009999999999999998
          kl: 0.012172388113132766
          policy_loss: 0.01164584971136517
          total_loss: 0.01906627027524842
          vf_explained_var: -0.001401970162987709
          vf_loss: 0.01613420500523514
    num_agent_steps_sampled: 720000
    num_agent_steps_trained: 720000
    num_steps_sampled: 720000
    num_steps_trained: 720000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,720,18462.4,720000,-2.746,-2.06,-4.07,274.6


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 721000
  custom_metrics: {}
  date: 2021-11-05_17-40-16
  done: false
  episode_len_mean: 275.46
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.7545999999999844
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 4
  episodes_total: 2680
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31375481581522946
          cur_lr: 5.000000000000001e-05
          entropy: 1.4308923449781206
          entropy_coeff: 0.009999999999999998
          kl: 0.011547324452369128
          policy_loss: 0.01853397736946742
          total_loss: 0.02151349104113049
          vf_explained_var: 0.1343315690755844
          vf_loss: 0.01366541063826945
    num_agent_steps_sampled: 721000
    num_agent_steps_trained: 721000
    num_steps_sampled: 721000
    num_steps_trained: 721000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,721,18485.7,721000,-2.7546,-2.06,-4.07,275.46


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 722000
  custom_metrics: {}
  date: 2021-11-05_17-40-41
  done: false
  episode_len_mean: 275.36
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.753599999999986
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 4
  episodes_total: 2684
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31375481581522946
          cur_lr: 5.000000000000001e-05
          entropy: 1.386981678671307
          entropy_coeff: 0.009999999999999998
          kl: 0.009381970519172396
          policy_loss: -0.0031877794199519686
          total_loss: 0.0007355332374572754
          vf_explained_var: 0.18948107957839966
          vf_loss: 0.014849490413649215
    num_agent_steps_sampled: 722000
    num_agent_steps_trained: 722000
    num_steps_sampled: 722000
    num_steps_trained: 722000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,722,18510.2,722000,-2.7536,-2.06,-4.07,275.36


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 723000
  custom_metrics: {}
  date: 2021-11-05_17-41-05
  done: false
  episode_len_mean: 275.57
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.755699999999986
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 3
  episodes_total: 2687
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31375481581522946
          cur_lr: 5.000000000000001e-05
          entropy: 1.361673492193222
          entropy_coeff: 0.009999999999999998
          kl: 0.009559231112535668
          policy_loss: -0.08936244812276628
          total_loss: -0.08723589984907044
          vf_explained_var: 0.31321388483047485
          vf_loss: 0.01274402445803086
    num_agent_steps_sampled: 723000
    num_agent_steps_trained: 723000
    num_steps_sampled: 723000
    num_steps_trained: 723000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,723,18534.2,723000,-2.7557,-2.06,-4.07,275.57


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 724000
  custom_metrics: {}
  date: 2021-11-05_17-41-29
  done: false
  episode_len_mean: 275.24
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.7523999999999855
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 4
  episodes_total: 2691
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31375481581522946
          cur_lr: 5.000000000000001e-05
          entropy: 1.436227896478441
          entropy_coeff: 0.009999999999999998
          kl: 0.005529496302695473
          policy_loss: -0.016523029324081208
          total_loss: -0.01538658200038804
          vf_explained_var: 0.27511295676231384
          vf_loss: 0.01376381946934594
    num_agent_steps_sampled: 724000
    num_agent_steps_trained: 724000
    num_steps_sampled: 724000
    num_steps_trained: 724000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,724,18558.4,724000,-2.7524,-2.06,-4.07,275.24


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 725000
  custom_metrics: {}
  date: 2021-11-05_17-41-52
  done: false
  episode_len_mean: 275.96
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.759599999999985
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 4
  episodes_total: 2695
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31375481581522946
          cur_lr: 5.000000000000001e-05
          entropy: 1.7263080305523342
          entropy_coeff: 0.009999999999999998
          kl: 0.012175184134359855
          policy_loss: 0.002882777320014106
          total_loss: 0.0014264966050783793
          vf_explained_var: 0.3081212639808655
          vf_loss: 0.011986774847739273
    num_agent_steps_sampled: 725000
    num_agent_steps_trained: 725000
    num_steps_sampled: 725000
    num_steps_trained: 725000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,725,18581.3,725000,-2.7596,-2.06,-4.07,275.96


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 726000
  custom_metrics: {}
  date: 2021-11-05_17-42-16
  done: false
  episode_len_mean: 275.78
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.757799999999985
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 4
  episodes_total: 2699
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31375481581522946
          cur_lr: 5.000000000000001e-05
          entropy: 1.699546093410916
          entropy_coeff: 0.009999999999999998
          kl: 0.013827263746076648
          policy_loss: 0.008208249426550336
          total_loss: 0.008674723572201198
          vf_explained_var: 0.19289745390415192
          vf_loss: 0.013123562145564292
    num_agent_steps_sampled: 726000
    num_agent_steps_trained: 726000
    num_steps_sampled: 726000
    num_steps_trained: 726000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,726,18605.1,726000,-2.7578,-2.06,-4.07,275.78




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 727000
  custom_metrics: {}
  date: 2021-11-05_17-42-58
  done: false
  episode_len_mean: 275.78
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.757799999999985
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 3
  episodes_total: 2702
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31375481581522946
          cur_lr: 5.000000000000001e-05
          entropy: 1.6716951767603556
          entropy_coeff: 0.009999999999999998
          kl: 0.011956675461889037
          policy_loss: 0.01361576302183999
          total_loss: 0.009072135637203852
          vf_explained_var: 0.38756489753723145
          vf_loss: 0.008421858105834366
    num_agent_steps_sampled: 727000
    num_agent_steps_trained: 727000
    num_steps_sampled: 727000
    num_steps_trained: 727000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,727,18646.6,727000,-2.7578,-2.06,-4.07,275.78


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 728000
  custom_metrics: {}
  date: 2021-11-05_17-43-22
  done: false
  episode_len_mean: 274.04
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.7403999999999855
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 4
  episodes_total: 2706
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31375481581522946
          cur_lr: 5.000000000000001e-05
          entropy: 1.5760754121674432
          entropy_coeff: 0.009999999999999998
          kl: 0.010745772744631508
          policy_loss: -0.006301472211877505
          total_loss: -0.00875182023478879
          vf_explained_var: 0.4519611895084381
          vf_loss: 0.009938865325724085
    num_agent_steps_sampled: 728000
    num_agent_steps_trained: 728000
    num_steps_sampled: 728000
    num_steps_trained: 728000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,728,18670.8,728000,-2.7404,-2.06,-4.07,274.04


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 729000
  custom_metrics: {}
  date: 2021-11-05_17-43-45
  done: false
  episode_len_mean: 273.97
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.7396999999999845
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 4
  episodes_total: 2710
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31375481581522946
          cur_lr: 5.000000000000001e-05
          entropy: 1.4783777832984923
          entropy_coeff: 0.009999999999999998
          kl: 0.012407091706091933
          policy_loss: -0.014541059401300219
          total_loss: -0.012897447496652604
          vf_explained_var: 0.3956372141838074
          vf_loss: 0.012534600532510215
    num_agent_steps_sampled: 729000
    num_agent_steps_trained: 729000
    num_steps_sampled: 729000
    num_steps_trained: 729000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,729,18694.1,729000,-2.7397,-2.06,-4.07,273.97


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 730000
  custom_metrics: {}
  date: 2021-11-05_17-44-09
  done: false
  episode_len_mean: 273.17
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.7316999999999854
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 4
  episodes_total: 2714
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31375481581522946
          cur_lr: 5.000000000000001e-05
          entropy: 1.42398458454344
          entropy_coeff: 0.009999999999999998
          kl: 0.008091951007757971
          policy_loss: -0.06031543579366472
          total_loss: -0.06102396539515919
          vf_explained_var: 0.3652186393737793
          vf_loss: 0.010992428903571433
    num_agent_steps_sampled: 730000
    num_agent_steps_trained: 730000
    num_steps_sampled: 730000
    num_steps_trained: 730000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,730,18717.9,730000,-2.7317,-2.06,-4.07,273.17


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 731000
  custom_metrics: {}
  date: 2021-11-05_17-44-34
  done: false
  episode_len_mean: 272.37
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.7236999999999862
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 3
  episodes_total: 2717
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31375481581522946
          cur_lr: 5.000000000000001e-05
          entropy: 1.4283249894777934
          entropy_coeff: 0.009999999999999998
          kl: 0.00991789404119202
          policy_loss: -0.14141755203406017
          total_loss: -0.1419100353287326
          vf_explained_var: 0.31333091855049133
          vf_loss: 0.010678975579018394
    num_agent_steps_sampled: 731000
    num_agent_steps_trained: 731000
    num_steps_sampled: 731000
    num_steps_trained: 731000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,731,18742.6,731000,-2.7237,-2.06,-4.07,272.37


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 732000
  custom_metrics: {}
  date: 2021-11-05_17-44-57
  done: false
  episode_len_mean: 272.2
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.7219999999999858
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 4
  episodes_total: 2721
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31375481581522946
          cur_lr: 5.000000000000001e-05
          entropy: 1.3710910174581739
          entropy_coeff: 0.009999999999999998
          kl: 0.00679347448097379
          policy_loss: 0.0015163975457350412
          total_loss: -0.000988096164332496
          vf_explained_var: 0.3428177237510681
          vf_loss: 0.0090749298946725
    num_agent_steps_sampled: 732000
    num_agent_steps_trained: 732000
    num_steps_sampled: 732000
    num_steps_trained: 732000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,732,18765.8,732000,-2.722,-2.06,-4.07,272.2


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 733000
  custom_metrics: {}
  date: 2021-11-05_17-45-22
  done: false
  episode_len_mean: 272.12
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.7211999999999854
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 4
  episodes_total: 2725
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31375481581522946
          cur_lr: 5.000000000000001e-05
          entropy: 1.2892923831939698
          entropy_coeff: 0.009999999999999998
          kl: 0.017859127252317662
          policy_loss: 0.024770123925473956
          total_loss: 0.026983122693167792
          vf_explained_var: 0.21626171469688416
          vf_loss: 0.009502533212718036
    num_agent_steps_sampled: 733000
    num_agent_steps_trained: 733000
    num_steps_sampled: 733000
    num_steps_trained: 733000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,733,18790.5,733000,-2.7212,-2.06,-4.07,272.12


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 734000
  custom_metrics: {}
  date: 2021-11-05_17-45-45
  done: false
  episode_len_mean: 272.99
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.729899999999985
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 4
  episodes_total: 2729
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31375481581522946
          cur_lr: 5.000000000000001e-05
          entropy: 1.1652636567751566
          entropy_coeff: 0.009999999999999998
          kl: 0.0047120803551164965
          policy_loss: -0.0007153676615820991
          total_loss: -0.001159465230173535
          vf_explained_var: 0.25675827264785767
          vf_loss: 0.009730105557375484
    num_agent_steps_sampled: 734000
    num_agent_steps_trained: 734000
    num_steps_sampled: 734000
    num_steps_trained: 734000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,734,18813.9,734000,-2.7299,-2.06,-4.07,272.99




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 735000
  custom_metrics: {}
  date: 2021-11-05_17-46-27
  done: false
  episode_len_mean: 273.28
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.7327999999999855
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 4
  episodes_total: 2733
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15687740790761473
          cur_lr: 5.000000000000001e-05
          entropy: 1.2246408290333217
          entropy_coeff: 0.009999999999999998
          kl: 0.021454844166983695
          policy_loss: -0.00685109943151474
          total_loss: -0.005415497057967716
          vf_explained_var: 0.2758481502532959
          vf_loss: 0.010316229828943809
    num_agent_steps_sampled: 735000
    num_agent_steps_trained: 735000
    num_steps_sampled: 735000
    num_steps_trained: 735000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,735,18856.1,735000,-2.7328,-2.06,-4.07,273.28


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 736000
  custom_metrics: {}
  date: 2021-11-05_17-46-52
  done: false
  episode_len_mean: 273.39
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.733899999999985
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 3
  episodes_total: 2736
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23531611186142204
          cur_lr: 5.000000000000001e-05
          entropy: 1.09138011402554
          entropy_coeff: 0.009999999999999998
          kl: 0.008558478292555172
          policy_loss: -0.09621593240234587
          total_loss: -0.09431401044130325
          vf_explained_var: 0.26017141342163086
          vf_loss: 0.010801775867326393
    num_agent_steps_sampled: 736000
    num_agent_steps_trained: 736000
    num_steps_sampled: 736000
    num_steps_trained: 736000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,736,18880.8,736000,-2.7339,-2.06,-4.07,273.39


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 737000
  custom_metrics: {}
  date: 2021-11-05_17-47-15
  done: false
  episode_len_mean: 273.2
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.731999999999985
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 4
  episodes_total: 2740
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23531611186142204
          cur_lr: 5.000000000000001e-05
          entropy: 0.8812698582808177
          entropy_coeff: 0.009999999999999998
          kl: 0.00607676736806929
          policy_loss: 0.0475921791460779
          total_loss: 0.051613585982057786
          vf_explained_var: 0.15354116261005402
          vf_loss: 0.011404143687751557
    num_agent_steps_sampled: 737000
    num_agent_steps_trained: 737000
    num_steps_sampled: 737000
    num_steps_trained: 737000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,737,18904,737000,-2.732,-2.06,-4.07,273.2


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 738000
  custom_metrics: {}
  date: 2021-11-05_17-47-39
  done: false
  episode_len_mean: 273.47
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.734699999999986
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 4
  episodes_total: 2744
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23531611186142204
          cur_lr: 5.000000000000001e-05
          entropy: 1.019461053609848
          entropy_coeff: 0.009999999999999998
          kl: 0.003899569434154336
          policy_loss: 0.02591734503706296
          total_loss: 0.028282596667607626
          vf_explained_var: 0.18618494272232056
          vf_loss: 0.011642228480842379
    num_agent_steps_sampled: 738000
    num_agent_steps_trained: 738000
    num_steps_sampled: 738000
    num_steps_trained: 738000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,738,18928.2,738000,-2.7347,-2.06,-4.07,273.47


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 739000
  custom_metrics: {}
  date: 2021-11-05_17-48-05
  done: false
  episode_len_mean: 273.45
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.7344999999999855
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 4
  episodes_total: 2748
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11765805593071102
          cur_lr: 5.000000000000001e-05
          entropy: 1.0148804048697153
          entropy_coeff: 0.009999999999999998
          kl: 0.005622916833460875
          policy_loss: -0.016701472881767484
          total_loss: -0.013920516106817457
          vf_explained_var: 0.16235139966011047
          vf_loss: 0.01226818123832345
    num_agent_steps_sampled: 739000
    num_agent_steps_trained: 739000
    num_steps_sampled: 739000
    num_steps_trained: 739000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,739,18953.4,739000,-2.7345,-2.06,-4.07,273.45


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 740000
  custom_metrics: {}
  date: 2021-11-05_17-48-30
  done: false
  episode_len_mean: 270.39
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.703899999999986
  episode_reward_min: -3.749999999999964
  episodes_this_iter: 4
  episodes_total: 2752
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11765805593071102
          cur_lr: 5.000000000000001e-05
          entropy: 1.0044200115733677
          entropy_coeff: 0.009999999999999998
          kl: 0.005803554828565578
          policy_loss: 0.02473169341683388
          total_loss: 0.027383374919493993
          vf_explained_var: 0.21537534892559052
          vf_loss: 0.012013045222394996
    num_agent_steps_sampled: 740000
    num_agent_steps_trained: 740000
    num_steps_sampled: 740000
    num_steps_trained: 740000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,740,18978.6,740000,-2.7039,-2.06,-3.75,270.39


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 741000
  custom_metrics: {}
  date: 2021-11-05_17-48-54
  done: false
  episode_len_mean: 266.43
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.664299999999987
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 4
  episodes_total: 2756
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11765805593071102
          cur_lr: 5.000000000000001e-05
          entropy: 1.0812406751844619
          entropy_coeff: 0.009999999999999998
          kl: 0.008068004084568751
          policy_loss: 0.033478000428941515
          total_loss: 0.038013771176338196
          vf_explained_var: 0.0666121393442154
          vf_loss: 0.014398910757154226
    num_agent_steps_sampled: 741000
    num_agent_steps_trained: 741000
    num_steps_sampled: 741000
    num_steps_trained: 741000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,741,19002.4,741000,-2.6643,-2.06,-3.44,266.43




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 742000
  custom_metrics: {}
  date: 2021-11-05_17-49-34
  done: false
  episode_len_mean: 265.03
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.6502999999999877
  episode_reward_min: -3.3599999999999723
  episodes_this_iter: 4
  episodes_total: 2760
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11765805593071102
          cur_lr: 5.000000000000001e-05
          entropy: 1.1349831448660956
          entropy_coeff: 0.009999999999999998
          kl: 0.007451868795764464
          policy_loss: 0.017375250822967953
          total_loss: 0.02067581183380551
          vf_explained_var: 0.15842963755130768
          vf_loss: 0.013773621960232655
    num_agent_steps_sampled: 742000
    num_agent_steps_trained: 742000
    num_steps_sampled: 742000
    num_steps_trained: 742000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,742,19042.4,742000,-2.6503,-2.06,-3.36,265.03


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 743000
  custom_metrics: {}
  date: 2021-11-05_17-49-58
  done: false
  episode_len_mean: 264.49
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.6448999999999874
  episode_reward_min: -3.3599999999999723
  episodes_this_iter: 3
  episodes_total: 2763
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11765805593071102
          cur_lr: 5.000000000000001e-05
          entropy: 1.1383268694082895
          entropy_coeff: 0.009999999999999998
          kl: 0.011526445318024988
          policy_loss: -0.0357333168387413
          total_loss: -0.03302020852764447
          vf_explained_var: 0.1394585818052292
          vf_loss: 0.012740196101367474
    num_agent_steps_sampled: 743000
    num_agent_steps_trained: 743000
    num_steps_sampled: 743000
    num_steps_trained: 743000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,743,19066.5,743000,-2.6449,-2.06,-3.36,264.49


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 744000
  custom_metrics: {}
  date: 2021-11-05_17-50-20
  done: false
  episode_len_mean: 263.56
  episode_media: {}
  episode_reward_max: -2.06
  episode_reward_mean: -2.6355999999999877
  episode_reward_min: -3.419999999999971
  episodes_this_iter: 4
  episodes_total: 2767
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11765805593071102
          cur_lr: 5.000000000000001e-05
          entropy: 1.1201847745312585
          entropy_coeff: 0.009999999999999998
          kl: 0.03785085929415462
          policy_loss: 0.011459523273838892
          total_loss: 0.022037883351246516
          vf_explained_var: 0.03690113499760628
          vf_loss: 0.01732674901270204
    num_agent_steps_sampled: 744000
    num_agent_steps_trained: 744000
    num_steps_sampled: 744000
    num_steps_trained: 744000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,744,19088.7,744000,-2.6356,-2.06,-3.42,263.56


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 745000
  custom_metrics: {}
  date: 2021-11-05_17-50-44
  done: false
  episode_len_mean: 263.83
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.638299999999987
  episode_reward_min: -3.419999999999971
  episodes_this_iter: 4
  episodes_total: 2771
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17648708389606657
          cur_lr: 5.000000000000001e-05
          entropy: 1.2263742314444648
          entropy_coeff: 0.009999999999999998
          kl: 0.021072959948617152
          policy_loss: -0.006133627146482468
          total_loss: -0.002695406393872367
          vf_explained_var: 0.41182219982147217
          vf_loss: 0.011982856163134178
    num_agent_steps_sampled: 745000
    num_agent_steps_trained: 745000
    num_steps_sampled: 745000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,745,19112.4,745000,-2.6383,-2.31,-3.42,263.83


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 746000
  custom_metrics: {}
  date: 2021-11-05_17-51-06
  done: false
  episode_len_mean: 263.59
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.635899999999987
  episode_reward_min: -3.419999999999971
  episodes_this_iter: 3
  episodes_total: 2774
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2647306258440998
          cur_lr: 5.000000000000001e-05
          entropy: 1.302030372619629
          entropy_coeff: 0.009999999999999998
          kl: 0.007211783305552129
          policy_loss: -0.07386935990717676
          total_loss: -0.07082441730631722
          vf_explained_var: 0.1496596485376358
          vf_loss: 0.014156064649836885
    num_agent_steps_sampled: 746000
    num_agent_steps_trained: 746000
    num_steps_sampled: 746000
    num_steps_trained: 746

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,746,19134.2,746000,-2.6359,-2.31,-3.42,263.59


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 747000
  custom_metrics: {}
  date: 2021-11-05_17-51-25
  done: false
  episode_len_mean: 267.54
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.6753999999999865
  episode_reward_min: -4.679999999999945
  episodes_this_iter: 3
  episodes_total: 2777
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2647306258440998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3277321908209059
          entropy_coeff: 0.009999999999999998
          kl: 0.01654627811505998
          policy_loss: 0.030143684479925367
          total_loss: 0.032441174321704444
          vf_explained_var: 0.22947338223457336
          vf_loss: 0.011194504803926166
    num_agent_steps_sampled: 747000
    num_agent_steps_trained: 747000
    num_steps_sampled: 747000
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,747,19153.1,747000,-2.6754,-2.31,-4.68,267.54


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 748000
  custom_metrics: {}
  date: 2021-11-05_17-51-46
  done: false
  episode_len_mean: 269.03
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.690299999999987
  episode_reward_min: -4.679999999999945
  episodes_this_iter: 3
  episodes_total: 2780
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2647306258440998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3365180545383029
          entropy_coeff: 0.009999999999999998
          kl: 0.015984663199381525
          policy_loss: 0.0027865424752235414
          total_loss: 0.005143530501259698
          vf_explained_var: -0.008535409346222878
          vf_loss: 0.011490537259831197
    num_agent_steps_sampled: 748000
    num_agent_steps_trained: 748000
    num_steps_sampled: 748000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,748,19174.9,748000,-2.6903,-2.31,-4.68,269.03


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 749000
  custom_metrics: {}
  date: 2021-11-05_17-52-10
  done: false
  episode_len_mean: 270.23
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.702299999999986
  episode_reward_min: -4.679999999999945
  episodes_this_iter: 3
  episodes_total: 2783
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2647306258440998
          cur_lr: 5.000000000000001e-05
          entropy: 1.1620910776986015
          entropy_coeff: 0.009999999999999998
          kl: 0.016551477151323567
          policy_loss: -0.12053156975242826
          total_loss: -0.11405200825797188
          vf_explained_var: 0.2753801643848419
          vf_loss: 0.01371879255813029
    num_agent_steps_sampled: 749000
    num_agent_steps_trained: 749000
    num_steps_sampled: 749000
    num_steps_trained: 749

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,749,19198.6,749000,-2.7023,-2.31,-4.68,270.23


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 750000
  custom_metrics: {}
  date: 2021-11-05_17-52-33
  done: false
  episode_len_mean: 271.08
  episode_media: {}
  episode_reward_max: -2.3099999999999947
  episode_reward_mean: -2.710799999999986
  episode_reward_min: -4.679999999999945
  episodes_this_iter: 4
  episodes_total: 2787
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2647306258440998
          cur_lr: 5.000000000000001e-05
          entropy: 1.1485677652888828
          entropy_coeff: 0.009999999999999998
          kl: 0.008207818596145148
          policy_loss: 0.01828180758489503
          total_loss: 0.022099132504728107
          vf_explained_var: 0.2396399974822998
          vf_loss: 0.013130142218950721
    num_agent_steps_sampled: 750000
    num_agent_steps_trained: 750000
    num_steps_sampled: 750000
    num_steps_trained: 750

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,750,19221.3,750000,-2.7108,-2.31,-4.68,271.08




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 751000
  custom_metrics: {}
  date: 2021-11-05_17-53-17
  done: false
  episode_len_mean: 270.82
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.7081999999999855
  episode_reward_min: -4.679999999999945
  episodes_this_iter: 4
  episodes_total: 2791
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2647306258440998
          cur_lr: 5.000000000000001e-05
          entropy: 1.1818149699105156
          entropy_coeff: 0.009999999999999998
          kl: 0.003986882398917955
          policy_loss: 0.020423877156443067
          total_loss: 0.02182210377520985
          vf_explained_var: 0.3211287260055542
          vf_loss: 0.012160924615131484
    num_agent_steps_sampled: 751000
    num_agent_steps_trained: 751000
    num_steps_sampled: 751000
    num_steps_trained: 751

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,751,19265,751000,-2.7082,-2.21,-4.68,270.82


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 752000
  custom_metrics: {}
  date: 2021-11-05_17-53-41
  done: false
  episode_len_mean: 269.89
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.6988999999999868
  episode_reward_min: -4.679999999999945
  episodes_this_iter: 4
  episodes_total: 2795
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1323653129220499
          cur_lr: 5.000000000000001e-05
          entropy: 1.1225804289182026
          entropy_coeff: 0.009999999999999998
          kl: 0.01448259759827045
          policy_loss: 0.02540078411499659
          total_loss: 0.031018757820129396
          vf_explained_var: 0.1615300476551056
          vf_loss: 0.014926781826135185
    num_agent_steps_sampled: 752000
    num_agent_steps_trained: 752000
    num_steps_sampled: 752000
    num_steps_trained: 7520

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,752,19289.7,752000,-2.6989,-2.21,-4.68,269.89


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 753000
  custom_metrics: {}
  date: 2021-11-05_17-54-05
  done: false
  episode_len_mean: 269.96
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.699599999999987
  episode_reward_min: -4.679999999999945
  episodes_this_iter: 4
  episodes_total: 2799
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1323653129220499
          cur_lr: 5.000000000000001e-05
          entropy: 1.0916950444380442
          entropy_coeff: 0.009999999999999998
          kl: 0.00903738046479761
          policy_loss: -0.011300242609447904
          total_loss: -0.005793648627069261
          vf_explained_var: 0.2182721197605133
          vf_loss: 0.015227309159106679
    num_agent_steps_sampled: 753000
    num_agent_steps_trained: 753000
    num_steps_sampled: 753000
    num_steps_trained: 75

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,753,19313.4,753000,-2.6996,-2.21,-4.68,269.96


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 754000
  custom_metrics: {}
  date: 2021-11-05_17-54-30
  done: false
  episode_len_mean: 269.52
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.6951999999999856
  episode_reward_min: -4.679999999999945
  episodes_this_iter: 3
  episodes_total: 2802
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1323653129220499
          cur_lr: 5.000000000000001e-05
          entropy: 1.0286917534139421
          entropy_coeff: 0.009999999999999998
          kl: 0.008575342342904888
          policy_loss: -0.15094229959779315
          total_loss: -0.1467509400513437
          vf_explained_var: 0.26586583256721497
          vf_loss: 0.013343201981236537
    num_agent_steps_sampled: 754000
    num_agent_steps_trained: 754000
    num_steps_sampled: 754000
    num_steps_trained: 75

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,754,19338.2,754000,-2.6952,-2.21,-4.68,269.52


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 755000
  custom_metrics: {}
  date: 2021-11-05_17-54-55
  done: false
  episode_len_mean: 269.41
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.694099999999986
  episode_reward_min: -4.679999999999945
  episodes_this_iter: 4
  episodes_total: 2806
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1323653129220499
          cur_lr: 5.000000000000001e-05
          entropy: 0.9981943355666266
          entropy_coeff: 0.009999999999999998
          kl: 0.006737210515099434
          policy_loss: -0.05290748584601614
          total_loss: -0.04920774267779456
          vf_explained_var: 0.24626405537128448
          vf_loss: 0.012789915553811523
    num_agent_steps_sampled: 755000
    num_agent_steps_trained: 755000
    num_steps_sampled: 755000
    num_steps_trained: 75

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,755,19363.2,755000,-2.6941,-2.21,-4.68,269.41


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 756000
  custom_metrics: {}
  date: 2021-11-05_17-55-20
  done: false
  episode_len_mean: 268.94
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.689399999999986
  episode_reward_min: -4.679999999999945
  episodes_this_iter: 4
  episodes_total: 2810
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1323653129220499
          cur_lr: 5.000000000000001e-05
          entropy: 1.0671120292610592
          entropy_coeff: 0.009999999999999998
          kl: 0.029890393141536166
          policy_loss: -0.026066263516743977
          total_loss: -0.019216103934579426
          vf_explained_var: 0.09522388130426407
          vf_loss: 0.013564827707078722
    num_agent_steps_sampled: 756000
    num_agent_steps_trained: 756000
    num_steps_sampled: 756000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,756,19388.1,756000,-2.6894,-2.21,-4.68,268.94


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 757000
  custom_metrics: {}
  date: 2021-11-05_17-55-36
  done: false
  episode_len_mean: 271.94
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.7193999999999856
  episode_reward_min: -5.239999999999933
  episodes_this_iter: 3
  episodes_total: 2813
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19854796938307492
          cur_lr: 5.000000000000001e-05
          entropy: 1.2083249237802294
          entropy_coeff: 0.009999999999999998
          kl: 0.014783571613359846
          policy_loss: -0.023576155139340294
          total_loss: -0.02284855478339725
          vf_explained_var: 0.22951482236385345
          vf_loss: 0.009875603154715565
    num_agent_steps_sampled: 757000
    num_agent_steps_trained: 757000
    num_steps_sampled: 757000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,757,19404.6,757000,-2.7194,-2.21,-5.24,271.94


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 758000
  custom_metrics: {}
  date: 2021-11-05_17-56-00
  done: false
  episode_len_mean: 272.96
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.7295999999999854
  episode_reward_min: -5.239999999999933
  episodes_this_iter: 3
  episodes_total: 2816
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19854796938307492
          cur_lr: 5.000000000000001e-05
          entropy: 1.0783088776800367
          entropy_coeff: 0.009999999999999998
          kl: 0.012264953673139366
          policy_loss: 0.00013231924838489955
          total_loss: 0.0010498392085234324
          vf_explained_var: 0.37529000639915466
          vf_loss: 0.009265429609351688
    num_agent_steps_sampled: 758000
    num_agent_steps_trained: 758000
    num_steps_sampled: 758000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,758,19428.3,758000,-2.7296,-2.21,-5.24,272.96




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 759000
  custom_metrics: {}
  date: 2021-11-05_17-56-42
  done: false
  episode_len_mean: 272.0
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.7199999999999864
  episode_reward_min: -5.239999999999933
  episodes_this_iter: 4
  episodes_total: 2820
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19854796938307492
          cur_lr: 5.000000000000001e-05
          entropy: 1.0342700865533616
          entropy_coeff: 0.009999999999999998
          kl: 0.005689166937860237
          policy_loss: -0.07863957650131649
          total_loss: -0.07187214742104213
          vf_explained_var: 0.11186050623655319
          vf_loss: 0.015980560198012327
    num_agent_steps_sampled: 759000
    num_agent_steps_trained: 759000
    num_steps_sampled: 759000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,759,19470.1,759000,-2.72,-2.17,-5.24,272


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 760000
  custom_metrics: {}
  date: 2021-11-05_17-57-04
  done: false
  episode_len_mean: 272.8
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.7279999999999855
  episode_reward_min: -5.239999999999933
  episodes_this_iter: 4
  episodes_total: 2824
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19854796938307492
          cur_lr: 5.000000000000001e-05
          entropy: 1.0099024415016173
          entropy_coeff: 0.009999999999999998
          kl: 0.006329431528691584
          policy_loss: 0.03087894254260593
          total_loss: 0.03380620032548905
          vf_explained_var: 0.2591470181941986
          vf_loss: 0.011769583645380206
    num_agent_steps_sampled: 760000
    num_agent_steps_trained: 760000
    num_steps_sampled: 760000
    num_steps_trained: 760

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,760,19492.1,760000,-2.728,-2.17,-5.24,272.8


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 761000
  custom_metrics: {}
  date: 2021-11-05_17-57-30
  done: false
  episode_len_mean: 272.75
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.727499999999986
  episode_reward_min: -5.239999999999933
  episodes_this_iter: 4
  episodes_total: 2828
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19854796938307492
          cur_lr: 5.000000000000001e-05
          entropy: 0.9970729192097981
          entropy_coeff: 0.009999999999999998
          kl: 0.007678671482681112
          policy_loss: 0.021540808098183737
          total_loss: 0.02556194563706716
          vf_explained_var: 0.16248981654644012
          vf_loss: 0.012467282337860928
    num_agent_steps_sampled: 761000
    num_agent_steps_trained: 761000
    num_steps_sampled: 761000
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,761,19517.7,761000,-2.7275,-2.17,-5.24,272.75


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 762000
  custom_metrics: {}
  date: 2021-11-05_17-57-56
  done: false
  episode_len_mean: 272.11
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.721099999999986
  episode_reward_min: -5.239999999999933
  episodes_this_iter: 4
  episodes_total: 2832
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19854796938307492
          cur_lr: 5.000000000000001e-05
          entropy: 0.9003549794356028
          entropy_coeff: 0.009999999999999998
          kl: 0.006024208994327928
          policy_loss: 0.03121859571999974
          total_loss: 0.03586609148316913
          vf_explained_var: 0.156550332903862
          vf_loss: 0.012454952713516024
    num_agent_steps_sampled: 762000
    num_agent_steps_trained: 762000
    num_steps_sampled: 762000
    num_steps_trained: 7620

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,762,19543.6,762000,-2.7211,-2.17,-5.24,272.11


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 763000
  custom_metrics: {}
  date: 2021-11-05_17-58-21
  done: false
  episode_len_mean: 271.48
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.7147999999999852
  episode_reward_min: -5.239999999999933
  episodes_this_iter: 4
  episodes_total: 2836
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19854796938307492
          cur_lr: 5.000000000000001e-05
          entropy: 1.047372614675098
          entropy_coeff: 0.009999999999999998
          kl: 0.004479392293498958
          policy_loss: 0.027346488171153597
          total_loss: 0.03038258287641737
          vf_explained_var: 0.14637932181358337
          vf_loss: 0.012620443509270747
    num_agent_steps_sampled: 763000
    num_agent_steps_trained: 763000
    num_steps_sampled: 763000
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,763,19569.1,763000,-2.7148,-2.17,-5.24,271.48


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 764000
  custom_metrics: {}
  date: 2021-11-05_17-58-39
  done: false
  episode_len_mean: 274.28
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.742799999999985
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 2839
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09927398469153746
          cur_lr: 5.000000000000001e-05
          entropy: 1.3259684801101685
          entropy_coeff: 0.009999999999999998
          kl: 0.02999202471820889
          policy_loss: 0.0389547316564454
          total_loss: 0.039318072299162544
          vf_explained_var: 0.21717476844787598
          vf_loss: 0.010645598946656619
    num_agent_steps_sampled: 764000
    num_agent_steps_trained: 764000
    num_steps_sampled: 764000
    num_steps_trained: 764

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,764,19587.4,764000,-2.7428,-2.17,-5.42,274.28


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 765000
  custom_metrics: {}
  date: 2021-11-05_17-58-57
  done: false
  episode_len_mean: 277.48
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.774799999999985
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 2
  episodes_total: 2841
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14891097703730616
          cur_lr: 5.000000000000001e-05
          entropy: 1.4454329755571154
          entropy_coeff: 0.009999999999999998
          kl: 0.021585199745745912
          policy_loss: 0.06398050917519463
          total_loss: 0.0577272300918897
          vf_explained_var: -0.3349185287952423
          vf_loss: 0.004986774303122527
    num_agent_steps_sampled: 765000
    num_agent_steps_trained: 765000
    num_steps_sampled: 765000
    num_steps_trained: 765

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,765,19604.6,765000,-2.7748,-2.17,-5.42,277.48


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 766000
  custom_metrics: {}
  date: 2021-11-05_17-59-20
  done: false
  episode_len_mean: 277.31
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.773099999999985
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 2845
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.0958073245154487
          entropy_coeff: 0.009999999999999998
          kl: 0.011214377088133333
          policy_loss: 0.04941248405310843
          total_loss: 0.04958924303452174
          vf_explained_var: 0.18400467932224274
          vf_loss: 0.008629915796013342
    num_agent_steps_sampled: 766000
    num_agent_steps_trained: 766000
    num_steps_sampled: 766000
    num_steps_trained: 766

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,766,19628.4,766000,-2.7731,-2.17,-5.42,277.31


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 767000
  custom_metrics: {}
  date: 2021-11-05_17-59-43
  done: false
  episode_len_mean: 278.87
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.7886999999999853
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 2848
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.4262282146347893
          entropy_coeff: 0.009999999999999998
          kl: 0.008221577574272073
          policy_loss: -0.10973883213268386
          total_loss: -0.10773402800162633
          vf_explained_var: 0.047056108713150024
          vf_loss: 0.014430663527713881
    num_agent_steps_sampled: 767000
    num_agent_steps_trained: 767000
    num_steps_sampled: 767000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,767,19651.3,767000,-2.7887,-2.17,-5.42,278.87




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 768000
  custom_metrics: {}
  date: 2021-11-05_18-00-22
  done: false
  episode_len_mean: 280.84
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.8083999999999842
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 2852
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.489172226852841
          entropy_coeff: 0.009999999999999998
          kl: 0.009317512170218438
          policy_loss: 0.01169557695587476
          total_loss: 0.01191050617231263
          vf_explained_var: 0.24007560312747955
          vf_loss: 0.013025428178823656
    num_agent_steps_sampled: 768000
    num_agent_steps_trained: 768000
    num_steps_sampled: 768000
    num_steps_trained: 768

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,768,19690.3,768000,-2.8084,-2.17,-5.42,280.84


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 769000
  custom_metrics: {}
  date: 2021-11-05_18-00-46
  done: false
  episode_len_mean: 281.9
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.818999999999983
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 2855
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.2923789660135905
          entropy_coeff: 0.009999999999999998
          kl: 0.008405755605136475
          policy_loss: -0.015807174311743843
          total_loss: -0.01961106922891405
          vf_explained_var: -0.15178555250167847
          vf_loss: 0.0072423308673832155
    num_agent_steps_sampled: 769000
    num_agent_steps_trained: 769000
    num_steps_sampled: 769000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,769,19713.5,769000,-2.819,-2.17,-5.42,281.9


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 770000
  custom_metrics: {}
  date: 2021-11-05_18-01-07
  done: false
  episode_len_mean: 282.7
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.8269999999999835
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 2858
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.3907150387763978
          entropy_coeff: 0.009999999999999998
          kl: 0.010635693561280767
          policy_loss: -0.12214564002222485
          total_loss: -0.11991064026951789
          vf_explained_var: 0.22065162658691406
          vf_loss: 0.013766491319984198
    num_agent_steps_sampled: 770000
    num_agent_steps_trained: 770000
    num_steps_sampled: 770000
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,770,19734.6,770000,-2.827,-2.17,-5.42,282.7


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 771000
  custom_metrics: {}
  date: 2021-11-05_18-01-28
  done: false
  episode_len_mean: 284.14
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.841399999999983
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 2862
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.3062998957104153
          entropy_coeff: 0.009999999999999998
          kl: 0.015552580579722057
          policy_loss: -0.0021155771281984115
          total_loss: 0.002899026705159081
          vf_explained_var: 0.09656637161970139
          vf_loss: 0.014603678540637096
    num_agent_steps_sampled: 771000
    num_agent_steps_trained: 771000
    num_steps_sampled: 771000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,771,19755.7,771000,-2.8414,-2.17,-5.42,284.14


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 772000
  custom_metrics: {}
  date: 2021-11-05_18-01-50
  done: false
  episode_len_mean: 284.2
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.841999999999983
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 2865
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.4671523835923936
          entropy_coeff: 0.009999999999999998
          kl: 0.009697832262587901
          policy_loss: -0.0367696268690957
          total_loss: -0.03791401187578837
          vf_explained_var: -0.32017046213150024
          vf_loss: 0.011360966284539448
    num_agent_steps_sampled: 772000
    num_agent_steps_trained: 772000
    num_steps_sampled: 772000
    num_steps_trained: 77

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,772,19777.9,772000,-2.842,-2.17,-5.42,284.2


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 773000
  custom_metrics: {}
  date: 2021-11-05_18-02-11
  done: false
  episode_len_mean: 285.65
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.8564999999999827
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 2868
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.410506100124783
          entropy_coeff: 0.009999999999999998
          kl: 0.01019838958013616
          policy_loss: -0.10592001792457369
          total_loss: -0.10472449676858055
          vf_explained_var: 0.3381763994693756
          vf_loss: 0.013022605759195156
    num_agent_steps_sampled: 773000
    num_agent_steps_trained: 773000
    num_steps_sampled: 773000
    num_steps_trained: 773

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,773,19798.8,773000,-2.8565,-2.17,-5.42,285.65


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 774000
  custom_metrics: {}
  date: 2021-11-05_18-02-34
  done: false
  episode_len_mean: 286.72
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.8671999999999827
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 2872
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.3572606378131442
          entropy_coeff: 0.009999999999999998
          kl: 0.006558924135900786
          policy_loss: 0.0028710462980800207
          total_loss: 0.0073868023024664985
          vf_explained_var: -0.027573050931096077
          vf_loss: 0.01662331567042404
    num_agent_steps_sampled: 774000
    num_agent_steps_trained: 774000
    num_steps_sampled: 774000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,774,19821.4,774000,-2.8672,-2.17,-5.42,286.72


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 775000
  custom_metrics: {}
  date: 2021-11-05_18-02-54
  done: false
  episode_len_mean: 286.37
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.8636999999999824
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 2875
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.4883962088161045
          entropy_coeff: 0.009999999999999998
          kl: 0.007465263845922819
          policy_loss: 0.05944033414125442
          total_loss: 0.05614911706911193
          vf_explained_var: 0.13375386595726013
          vf_loss: 0.009925253213279778
    num_agent_steps_sampled: 775000
    num_agent_steps_trained: 775000
    num_steps_sampled: 775000
    num_steps_trained: 77

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,775,19842.1,775000,-2.8637,-2.17,-5.42,286.37


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 776000
  custom_metrics: {}
  date: 2021-11-05_18-03-17
  done: false
  episode_len_mean: 285.71
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.857099999999983
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 2878
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.359617578983307
          entropy_coeff: 0.009999999999999998
          kl: 0.006050670091768653
          policy_loss: -0.11949675070742766
          total_loss: -0.11528049198289712
          vf_explained_var: 0.12398320436477661
          vf_loss: 0.0164609184074733
    num_agent_steps_sampled: 776000
    num_agent_steps_trained: 776000
    num_steps_sampled: 776000
    num_steps_trained: 7760

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,776,19864.4,776000,-2.8571,-2.17,-5.42,285.71




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 777000
  custom_metrics: {}
  date: 2021-11-05_18-03-56
  done: false
  episode_len_mean: 285.84
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.8583999999999827
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 2882
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.3631976034906175
          entropy_coeff: 0.009999999999999998
          kl: 0.009818869260902316
          policy_loss: -0.0023314596050315432
          total_loss: 0.0019206951061884563
          vf_explained_var: 0.15682753920555115
          vf_loss: 0.015690922499116924
    num_agent_steps_sampled: 777000
    num_agent_steps_trained: 777000
    num_steps_sampled: 777000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,777,19903.9,777000,-2.8584,-2.17,-5.42,285.84


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 778000
  custom_metrics: {}
  date: 2021-11-05_18-04-18
  done: false
  episode_len_mean: 284.92
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.8491999999999837
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 2885
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.4002493540445964
          entropy_coeff: 0.009999999999999998
          kl: 0.007187295402437618
          policy_loss: 0.05861038987835248
          total_loss: 0.0555802325407664
          vf_explained_var: 0.31177783012390137
          vf_loss: 0.00936693236645725
    num_agent_steps_sampled: 778000
    num_agent_steps_trained: 778000
    num_steps_sampled: 778000
    num_steps_trained: 7780

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,778,19925.4,778000,-2.8492,-2.17,-5.42,284.92


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 779000
  custom_metrics: {}
  date: 2021-11-05_18-04-39
  done: false
  episode_len_mean: 286.4
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.863999999999984
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 2888
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.3894380119111802
          entropy_coeff: 0.009999999999999998
          kl: 0.016503086526056164
          policy_loss: -0.012494179192516539
          total_loss: -0.01324626902739207
          vf_explained_var: 0.3008328080177307
          vf_loss: 0.009456055189689828
    num_agent_steps_sampled: 779000
    num_agent_steps_trained: 779000
    num_steps_sampled: 779000
    num_steps_trained: 77

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,779,19946.3,779000,-2.864,-2.17,-5.42,286.4


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 780000
  custom_metrics: {}
  date: 2021-11-05_18-05-00
  done: false
  episode_len_mean: 288.95
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.889499999999982
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 2891
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.4863057560390895
          entropy_coeff: 0.009999999999999998
          kl: 0.007914788166765952
          policy_loss: -0.06104878733555476
          total_loss: -0.06183238037758403
          vf_explained_var: 0.339527428150177
          vf_loss: 0.012311565441389878
    num_agent_steps_sampled: 780000
    num_agent_steps_trained: 780000
    num_steps_sampled: 780000
    num_steps_trained: 780

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,780,19967.2,780000,-2.8895,-2.17,-5.42,288.95


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 781000
  custom_metrics: {}
  date: 2021-11-05_18-05-19
  done: false
  episode_len_mean: 290.98
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.9097999999999815
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 2894
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.4513760685920716
          entropy_coeff: 0.009999999999999998
          kl: 0.012407610704722868
          policy_loss: -0.12289843161900839
          total_loss: -0.12107687037852076
          vf_explained_var: 0.1200392097234726
          vf_loss: 0.01356387897911999
    num_agent_steps_sampled: 781000
    num_agent_steps_trained: 781000
    num_steps_sampled: 781000
    num_steps_trained: 78

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,781,19986.7,781000,-2.9098,-2.17,-5.42,290.98


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 782000
  custom_metrics: {}
  date: 2021-11-05_18-05-41
  done: false
  episode_len_mean: 292.63
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.926299999999982
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 2897
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.40248404873742
          entropy_coeff: 0.009999999999999998
          kl: 0.0064968349082646015
          policy_loss: -0.12496831433640586
          total_loss: -0.12591536997093095
          vf_explained_var: 0.1948096752166748
          vf_loss: 0.011626611525813738
    num_agent_steps_sampled: 782000
    num_agent_steps_trained: 782000
    num_steps_sampled: 782000
    num_steps_trained: 782

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,782,20008.2,782000,-2.9263,-2.17,-5.42,292.63


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 783000
  custom_metrics: {}
  date: 2021-11-05_18-06-00
  done: false
  episode_len_mean: 295.49
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.9548999999999808
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 2900
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.4245686358875698
          entropy_coeff: 0.009999999999999998
          kl: 0.009899285639392966
          policy_loss: -0.024008775254090627
          total_loss: -0.02675847328371472
          vf_explained_var: 0.06105940043926239
          vf_loss: 0.009284824562362499
    num_agent_steps_sampled: 783000
    num_agent_steps_trained: 783000
    num_steps_sampled: 783000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,783,20027.1,783000,-2.9549,-2.17,-5.42,295.49


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 784000
  custom_metrics: {}
  date: 2021-11-05_18-06-17
  done: false
  episode_len_mean: 299.32
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.9931999999999794
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 2903
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.370574038558536
          entropy_coeff: 0.009999999999999998
          kl: 0.008903171729677141
          policy_loss: 0.07060721202029122
          total_loss: 0.06728857499029901
          vf_explained_var: 0.31582653522491455
          vf_loss: 0.008398431588041906
    num_agent_steps_sampled: 784000
    num_agent_steps_trained: 784000
    num_steps_sampled: 784000
    num_steps_trained: 784

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,784,20044.1,784000,-2.9932,-2.17,-5.42,299.32


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 785000
  custom_metrics: {}
  date: 2021-11-05_18-06-39
  done: false
  episode_len_mean: 300.3
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -3.0029999999999797
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 2906
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.367403096622891
          entropy_coeff: 0.009999999999999998
          kl: 0.014980811956144811
          policy_loss: -0.04098607392774688
          total_loss: -0.04126012499133746
          vf_explained_var: 0.08521512895822525
          vf_loss: 0.010053769343843062
    num_agent_steps_sampled: 785000
    num_agent_steps_trained: 785000
    num_steps_sampled: 785000
    num_steps_trained: 78

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,785,20066.9,785000,-3.003,-2.17,-5.42,300.3




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 786000
  custom_metrics: {}
  date: 2021-11-05_18-07-18
  done: false
  episode_len_mean: 301.33
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -3.013299999999979
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 2910
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.296750134891934
          entropy_coeff: 0.009999999999999998
          kl: 0.007183507349355859
          policy_loss: 0.01865517571568489
          total_loss: 0.020333340929614172
          vf_explained_var: 0.06395366787910461
          vf_loss: 0.013041111402627495
    num_agent_steps_sampled: 786000
    num_agent_steps_trained: 786000
    num_steps_sampled: 786000
    num_steps_trained: 786

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,786,20105.6,786000,-3.0133,-2.17,-5.42,301.33


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 787000
  custom_metrics: {}
  date: 2021-11-05_18-07-41
  done: false
  episode_len_mean: 299.47
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.99469999999998
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 2913
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.3586480273140802
          entropy_coeff: 0.009999999999999998
          kl: 0.014027765168509157
          policy_loss: 0.023606303996509977
          total_loss: 0.023954964015218946
          vf_explained_var: -0.041526056826114655
          vf_loss: 0.010801808351081693
    num_agent_steps_sampled: 787000
    num_agent_steps_trained: 787000
    num_steps_sampled: 787000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,787,20128.8,787000,-2.9947,-2.17,-5.42,299.47


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 788000
  custom_metrics: {}
  date: 2021-11-05_18-08-03
  done: false
  episode_len_mean: 299.49
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.9948999999999795
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 2916
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2233664655559592
          cur_lr: 5.000000000000001e-05
          entropy: 1.336456122663286
          entropy_coeff: 0.009999999999999998
          kl: 0.03142943610511119
          policy_loss: -0.09687578454613685
          total_loss: -0.08855587525500191
          vf_explained_var: 0.06474050134420395
          vf_loss: 0.014664184519400198
    num_agent_steps_sampled: 788000
    num_agent_steps_trained: 788000
    num_steps_sampled: 788000
    num_steps_trained: 78

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,788,20150.8,788000,-2.9949,-2.17,-5.42,299.49


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 789000
  custom_metrics: {}
  date: 2021-11-05_18-08-25
  done: false
  episode_len_mean: 301.25
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -3.01249999999998
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 2920
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.33504969833393883
          cur_lr: 5.000000000000001e-05
          entropy: 1.2900714013311598
          entropy_coeff: 0.009999999999999998
          kl: 0.007055415478068659
          policy_loss: 0.02676745984289381
          total_loss: 0.027724961191415785
          vf_explained_var: 0.35907506942749023
          vf_loss: 0.011494297394528985
    num_agent_steps_sampled: 789000
    num_agent_steps_trained: 789000
    num_steps_sampled: 789000
    num_steps_trained: 78

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,789,20172.6,789000,-3.0125,-2.36,-5.42,301.25


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 790000
  custom_metrics: {}
  date: 2021-11-05_18-08-49
  done: false
  episode_len_mean: 300.95
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -3.00949999999998
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 2924
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.33504969833393883
          cur_lr: 5.000000000000001e-05
          entropy: 1.2030131253931258
          entropy_coeff: 0.009999999999999998
          kl: 0.008621549760895025
          policy_loss: -0.016976962155765957
          total_loss: -0.014239212622245152
          vf_explained_var: 0.21709850430488586
          vf_loss: 0.011879231832507584
    num_agent_steps_sampled: 790000
    num_agent_steps_trained: 790000
    num_steps_sampled: 790000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,790,20196.3,790000,-3.0095,-2.36,-5.42,300.95


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 791000
  custom_metrics: {}
  date: 2021-11-05_18-09-14
  done: false
  episode_len_mean: 300.69
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -3.00689999999998
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 3
  episodes_total: 2927
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.33504969833393883
          cur_lr: 5.000000000000001e-05
          entropy: 1.1148775729868148
          entropy_coeff: 0.009999999999999998
          kl: 0.007449987721390746
          policy_loss: -0.12340549495485094
          total_loss: -0.11908914496501287
          vf_explained_var: 0.25717616081237793
          vf_loss: 0.012969009961105055
    num_agent_steps_sampled: 791000
    num_agent_steps_trained: 791000
    num_steps_sampled: 791000
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,791,20220.8,791000,-3.0069,-2.36,-5.42,300.69


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 792000
  custom_metrics: {}
  date: 2021-11-05_18-09-37
  done: false
  episode_len_mean: 301.66
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -3.01659999999998
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 2931
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.33504969833393883
          cur_lr: 5.000000000000001e-05
          entropy: 1.1753251433372498
          entropy_coeff: 0.009999999999999998
          kl: 0.005715580278926306
          policy_loss: -0.016098953783512115
          total_loss: -0.013839138961500592
          vf_explained_var: 0.31699222326278687
          vf_loss: 0.01209805981359548
    num_agent_steps_sampled: 792000
    num_agent_steps_trained: 792000
    num_steps_sampled: 792000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,792,20244.5,792000,-3.0166,-2.36,-5.42,301.66


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 793000
  custom_metrics: {}
  date: 2021-11-05_18-10-02
  done: false
  episode_len_mean: 301.74
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.0173999999999803
  episode_reward_min: -5.419999999999929
  episodes_this_iter: 4
  episodes_total: 2935
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.33504969833393883
          cur_lr: 5.000000000000001e-05
          entropy: 0.898623702261183
          entropy_coeff: 0.009999999999999998
          kl: 0.004523003717603697
          policy_loss: 0.02159089135626952
          total_loss: 0.022678740529550448
          vf_explained_var: 0.5605959296226501
          vf_loss: 0.008558655742348896
    num_agent_steps_sampled: 793000
    num_agent_steps_trained: 793000
    num_steps_sampled: 793000
    num_steps_trained: 79

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,793,20269.4,793000,-3.0174,-2.32,-5.42,301.74


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 794000
  custom_metrics: {}
  date: 2021-11-05_18-10-27
  done: false
  episode_len_mean: 299.16
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -2.9915999999999796
  episode_reward_min: -5.179999999999934
  episodes_this_iter: 4
  episodes_total: 2939
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16752484916696941
          cur_lr: 5.000000000000001e-05
          entropy: 0.953329301542706
          entropy_coeff: 0.009999999999999998
          kl: 0.005222676120519844
          policy_loss: 0.03865501797861523
          total_loss: 0.038509388433562386
          vf_explained_var: 0.4377836585044861
          vf_loss: 0.008512738046960698
    num_agent_steps_sampled: 794000
    num_agent_steps_trained: 794000
    num_steps_sampled: 794000
    num_steps_trained: 79

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,794,20294.1,794000,-2.9916,-2.32,-5.18,299.16




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 795000
  custom_metrics: {}
  date: 2021-11-05_18-11-09
  done: false
  episode_len_mean: 295.16
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.951599999999981
  episode_reward_min: -5.179999999999934
  episodes_this_iter: 4
  episodes_total: 2943
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16752484916696941
          cur_lr: 5.000000000000001e-05
          entropy: 1.1246432099077437
          entropy_coeff: 0.009999999999999998
          kl: 0.006928409536120088
          policy_loss: 0.010190197991000282
          total_loss: 0.011168364187081654
          vf_explained_var: 0.3758835196495056
          vf_loss: 0.011063914301080837
    num_agent_steps_sampled: 795000
    num_agent_steps_trained: 795000
    num_steps_sampled: 795000
    num_steps_trained: 79

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,795,20336.3,795000,-2.9516,-2.21,-5.18,295.16


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 796000
  custom_metrics: {}
  date: 2021-11-05_18-11-36
  done: false
  episode_len_mean: 293.59
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.935899999999981
  episode_reward_min: -5.179999999999934
  episodes_this_iter: 4
  episodes_total: 2947
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16752484916696941
          cur_lr: 5.000000000000001e-05
          entropy: 0.6728730248080359
          entropy_coeff: 0.009999999999999998
          kl: 0.005180360149776438
          policy_loss: -0.024850479430622525
          total_loss: -0.02018624602092637
          vf_explained_var: 0.2863878607749939
          vf_loss: 0.010525123981965912
    num_agent_steps_sampled: 796000
    num_agent_steps_trained: 796000
    num_steps_sampled: 796000
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,796,20363.2,796000,-2.9359,-2.21,-5.18,293.59


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 797000
  custom_metrics: {}
  date: 2021-11-05_18-12-02
  done: false
  episode_len_mean: 291.56
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.9155999999999813
  episode_reward_min: -5.179999999999934
  episodes_this_iter: 4
  episodes_total: 2951
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16752484916696941
          cur_lr: 5.000000000000001e-05
          entropy: 0.82017217013571
          entropy_coeff: 0.009999999999999998
          kl: 0.003787271088383232
          policy_loss: -0.037931021468506916
          total_loss: -0.034804552296797436
          vf_explained_var: 0.29553601145744324
          vf_loss: 0.010693729968948497
    num_agent_steps_sampled: 797000
    num_agent_steps_trained: 797000
    num_steps_sampled: 797000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,797,20389.5,797000,-2.9156,-2.21,-5.18,291.56


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 798000
  custom_metrics: {}
  date: 2021-11-05_18-12-28
  done: false
  episode_len_mean: 289.13
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.8912999999999824
  episode_reward_min: -5.179999999999934
  episodes_this_iter: 5
  episodes_total: 2956
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08376242458348471
          cur_lr: 5.000000000000001e-05
          entropy: 0.8607541905509101
          entropy_coeff: 0.009999999999999998
          kl: 0.007262743837744602
          policy_loss: -0.01800140705373552
          total_loss: -0.01293543933166398
          vf_explained_var: 0.33831316232681274
          vf_loss: 0.013065163501434857
    num_agent_steps_sampled: 798000
    num_agent_steps_trained: 798000
    num_steps_sampled: 798000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,798,20415.5,798000,-2.8913,-2.21,-5.18,289.13


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 799000
  custom_metrics: {}
  date: 2021-11-05_18-12-55
  done: false
  episode_len_mean: 285.64
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.856399999999983
  episode_reward_min: -5.179999999999934
  episodes_this_iter: 4
  episodes_total: 2960
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08376242458348471
          cur_lr: 5.000000000000001e-05
          entropy: 0.5726959145731396
          entropy_coeff: 0.009999999999999998
          kl: 0.005073943471954104
          policy_loss: 0.034625702848037086
          total_loss: 0.03731703220142259
          vf_explained_var: 0.4054962694644928
          vf_loss: 0.007993279847626884
    num_agent_steps_sampled: 799000
    num_agent_steps_trained: 799000
    num_steps_sampled: 799000
    num_steps_trained: 799

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,799,20442.2,799000,-2.8564,-2.21,-5.18,285.64


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 800000
  custom_metrics: {}
  date: 2021-11-05_18-13-22
  done: false
  episode_len_mean: 283.8
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.837999999999983
  episode_reward_min: -5.179999999999934
  episodes_this_iter: 4
  episodes_total: 2964
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08376242458348471
          cur_lr: 5.000000000000001e-05
          entropy: 0.7808519681294759
          entropy_coeff: 0.009999999999999998
          kl: 0.009250294048304436
          policy_loss: -0.012210222582022348
          total_loss: -0.00926487719019254
          vf_explained_var: 0.16474056243896484
          vf_loss: 0.009979036641824576
    num_agent_steps_sampled: 800000
    num_agent_steps_trained: 800000
    num_steps_sampled: 800000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,800,20468.7,800000,-2.838,-2.21,-5.18,283.8


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 801000
  custom_metrics: {}
  date: 2021-11-05_18-13-49
  done: false
  episode_len_mean: 280.66
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.8065999999999844
  episode_reward_min: -5.179999999999934
  episodes_this_iter: 4
  episodes_total: 2968
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08376242458348471
          cur_lr: 5.000000000000001e-05
          entropy: 0.6041460454463958
          entropy_coeff: 0.009999999999999998
          kl: 0.005212035906888069
          policy_loss: -0.022964764965905084
          total_loss: -0.01787692333261172
          vf_explained_var: 0.20752616226673126
          vf_loss: 0.010692732646647427
    num_agent_steps_sampled: 801000
    num_agent_steps_trained: 801000
    num_steps_sampled: 801000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,801,20496.4,801000,-2.8066,-2.21,-5.18,280.66




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 802000
  custom_metrics: {}
  date: 2021-11-05_18-14-33
  done: false
  episode_len_mean: 277.33
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.773299999999985
  episode_reward_min: -5.179999999999934
  episodes_this_iter: 5
  episodes_total: 2973
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08376242458348471
          cur_lr: 5.000000000000001e-05
          entropy: 0.8165534761216905
          entropy_coeff: 0.009999999999999998
          kl: 0.014019564379768685
          policy_loss: -0.007275371005137762
          total_loss: 0.0011014216476016574
          vf_explained_var: 0.18547822535037994
          vf_loss: 0.015368015132844448
    num_agent_steps_sampled: 802000
    num_agent_steps_trained: 802000
    num_steps_sampled: 802000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,802,20540,802000,-2.7733,-2.05,-5.18,277.33


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 803000
  custom_metrics: {}
  date: 2021-11-05_18-15-00
  done: false
  episode_len_mean: 274.03
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.7402999999999853
  episode_reward_min: -5.179999999999934
  episodes_this_iter: 4
  episodes_total: 2977
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08376242458348471
          cur_lr: 5.000000000000001e-05
          entropy: 0.5930538054969575
          entropy_coeff: 0.009999999999999998
          kl: 0.004351129236674625
          policy_loss: -0.0009958537502421272
          total_loss: 0.003504946455359459
          vf_explained_var: 0.09971295297145844
          vf_loss: 0.010066877031284902
    num_agent_steps_sampled: 803000
    num_agent_steps_trained: 803000
    num_steps_sampled: 803000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,803,20567.1,803000,-2.7403,-2.05,-5.18,274.03


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 804000
  custom_metrics: {}
  date: 2021-11-05_18-15-27
  done: false
  episode_len_mean: 271.95
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.719499999999986
  episode_reward_min: -5.179999999999934
  episodes_this_iter: 4
  episodes_total: 2981
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041881212291742353
          cur_lr: 5.000000000000001e-05
          entropy: 0.6251550436019897
          entropy_coeff: 0.009999999999999998
          kl: 0.012186563495413338
          policy_loss: -0.03953438591625955
          total_loss: -0.03361455500125885
          vf_explained_var: 0.14124265313148499
          vf_loss: 0.011660992809467845
    num_agent_steps_sampled: 804000
    num_agent_steps_trained: 804000
    num_steps_sampled: 804000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,804,20594.3,804000,-2.7195,-2.05,-5.18,271.95


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 805000
  custom_metrics: {}
  date: 2021-11-05_18-15-54
  done: false
  episode_len_mean: 267.55
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.675499999999986
  episode_reward_min: -5.179999999999934
  episodes_this_iter: 5
  episodes_total: 2986
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041881212291742353
          cur_lr: 5.000000000000001e-05
          entropy: 0.49865735405021244
          entropy_coeff: 0.009999999999999998
          kl: 0.009272636528621448
          policy_loss: -0.0028606900738345252
          total_loss: 0.006685577167405023
          vf_explained_var: 0.18558624386787415
          vf_loss: 0.014144491880304283
    num_agent_steps_sampled: 805000
    num_agent_steps_trained: 805000
    num_steps_sampled: 805000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,805,20620.8,805000,-2.6755,-2.05,-5.18,267.55


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 806000
  custom_metrics: {}
  date: 2021-11-05_18-16-21
  done: false
  episode_len_mean: 263.89
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.638899999999987
  episode_reward_min: -5.179999999999934
  episodes_this_iter: 4
  episodes_total: 2990
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041881212291742353
          cur_lr: 5.000000000000001e-05
          entropy: 0.46306292547119987
          entropy_coeff: 0.009999999999999998
          kl: 0.006254824144458718
          policy_loss: 0.01608143804801835
          total_loss: 0.02445525179306666
          vf_explained_var: 0.15471746027469635
          vf_loss: 0.012742483315782415
    num_agent_steps_sampled: 806000
    num_agent_steps_trained: 806000
    num_steps_sampled: 806000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,806,20648.1,806000,-2.6389,-2.05,-5.18,263.89


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 807000
  custom_metrics: {}
  date: 2021-11-05_18-16-49
  done: false
  episode_len_mean: 258.78
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5877999999999886
  episode_reward_min: -5.179999999999934
  episodes_this_iter: 5
  episodes_total: 2995
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.041881212291742353
          cur_lr: 5.000000000000001e-05
          entropy: 0.4495999270015293
          entropy_coeff: 0.009999999999999998
          kl: 0.0024230540828886165
          policy_loss: -0.03006909191608429
          total_loss: -0.019542205913199318
          vf_explained_var: 0.1896563023328781
          vf_loss: 0.014921403903928068
    num_agent_steps_sampled: 807000
    num_agent_steps_trained: 807000
    num_steps_sampled: 807000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,807,20675.6,807000,-2.5878,-2.05,-5.18,258.78


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 808000
  custom_metrics: {}
  date: 2021-11-05_18-17-16
  done: false
  episode_len_mean: 254.52
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5451999999999897
  episode_reward_min: -5.179999999999934
  episodes_this_iter: 4
  episodes_total: 2999
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.020940606145871177
          cur_lr: 5.000000000000001e-05
          entropy: 0.4544108132521311
          entropy_coeff: 0.009999999999999998
          kl: 0.006336739618483812
          policy_loss: 0.04496886548068788
          total_loss: 0.050085778906941415
          vf_explained_var: 0.21090993285179138
          vf_loss: 0.009528327298661073
    num_agent_steps_sampled: 808000
    num_agent_steps_trained: 808000
    num_steps_sampled: 808000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,808,20703.2,808000,-2.5452,-2.05,-5.18,254.52




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 809000
  custom_metrics: {}
  date: 2021-11-05_18-18-03
  done: false
  episode_len_mean: 247.23
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.4722999999999913
  episode_reward_min: -3.509999999999969
  episodes_this_iter: 5
  episodes_total: 3004
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.020940606145871177
          cur_lr: 5.000000000000001e-05
          entropy: 0.45254346198505824
          entropy_coeff: 0.009999999999999998
          kl: 0.004691855986678127
          policy_loss: -0.03419496913750966
          total_loss: -0.02226042126615842
          vf_explained_var: 0.09151872992515564
          vf_loss: 0.01636173346390327
    num_agent_steps_sampled: 809000
    num_agent_steps_trained: 809000
    num_steps_sampled: 809000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,809,20749.3,809000,-2.4723,-1.98,-3.51,247.23


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 810000
  custom_metrics: {}
  date: 2021-11-05_18-18-30
  done: false
  episode_len_mean: 245.14
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.451399999999991
  episode_reward_min: -3.509999999999969
  episodes_this_iter: 4
  episodes_total: 3008
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010470303072935588
          cur_lr: 5.000000000000001e-05
          entropy: 0.4577909234497282
          entropy_coeff: 0.009999999999999998
          kl: 0.01367540860180346
          policy_loss: 0.04094313805301984
          total_loss: 0.04802887969546848
          vf_explained_var: 0.18461504578590393
          vf_loss: 0.011520463745627138
    num_agent_steps_sampled: 810000
    num_agent_steps_trained: 810000
    num_steps_sampled: 810000
    num_steps_trained: 81

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,810,20776.3,810000,-2.4514,-1.98,-3.51,245.14


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 811000
  custom_metrics: {}
  date: 2021-11-05_18-18-57
  done: false
  episode_len_mean: 242.43
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.424299999999992
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 3012
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010470303072935588
          cur_lr: 5.000000000000001e-05
          entropy: 0.4848185979657703
          entropy_coeff: 0.009999999999999998
          kl: 0.009921824045369792
          policy_loss: -0.007789976067013211
          total_loss: -0.0018260383771525489
          vf_explained_var: 0.2959284484386444
          vf_loss: 0.010708240931853652
    num_agent_steps_sampled: 811000
    num_agent_steps_trained: 811000
    num_steps_sampled: 811000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,811,20804.1,811000,-2.4243,-1.98,-3.31,242.43


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 812000
  custom_metrics: {}
  date: 2021-11-05_18-19-24
  done: false
  episode_len_mean: 239.37
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.393699999999993
  episode_reward_min: -3.2499999999999747
  episodes_this_iter: 5
  episodes_total: 3017
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010470303072935588
          cur_lr: 5.000000000000001e-05
          entropy: 0.5859217951695125
          entropy_coeff: 0.009999999999999998
          kl: 0.013884421174144438
          policy_loss: 0.010807878772417704
          total_loss: 0.01913973804977205
          vf_explained_var: 0.40103834867477417
          vf_loss: 0.014045704000939926
    num_agent_steps_sampled: 812000
    num_agent_steps_trained: 812000
    num_steps_sampled: 812000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,812,20830.3,812000,-2.3937,-1.98,-3.25,239.37


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 813000
  custom_metrics: {}
  date: 2021-11-05_18-19-51
  done: false
  episode_len_mean: 237.35
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.3734999999999933
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 3021
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010470303072935588
          cur_lr: 5.000000000000001e-05
          entropy: 0.5677111350827747
          entropy_coeff: 0.009999999999999998
          kl: 0.0064404770252816075
          policy_loss: 0.00937121667795711
          total_loss: 0.015577724741564856
          vf_explained_var: 0.27919450402259827
          vf_loss: 0.011816182773974207
    num_agent_steps_sampled: 813000
    num_agent_steps_trained: 813000
    num_steps_sampled: 813000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,813,20857.2,813000,-2.3735,-1.98,-3.1,237.35


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 814000
  custom_metrics: {}
  date: 2021-11-05_18-20-17
  done: false
  episode_len_mean: 236.21
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.3620999999999936
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 3025
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010470303072935588
          cur_lr: 5.000000000000001e-05
          entropy: 0.6513604349560208
          entropy_coeff: 0.009999999999999998
          kl: 0.0182727070664266
          policy_loss: 0.022476518526673317
          total_loss: 0.026032057156165442
          vf_explained_var: 0.45185479521751404
          vf_loss: 0.009877821233951383
    num_agent_steps_sampled: 814000
    num_agent_steps_trained: 814000
    num_steps_sampled: 814000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,814,20883.6,814000,-2.3621,-1.98,-2.95,236.21


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 815000
  custom_metrics: {}
  date: 2021-11-05_18-20-44
  done: false
  episode_len_mean: 234.66
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.346599999999994
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 3029
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010470303072935588
          cur_lr: 5.000000000000001e-05
          entropy: 0.4601633386479484
          entropy_coeff: 0.009999999999999998
          kl: 0.010570453558397554
          policy_loss: -0.06138454361094369
          total_loss: -0.05221896982855267
          vf_explained_var: 0.2492941915988922
          vf_loss: 0.013656527714596854
    num_agent_steps_sampled: 815000
    num_agent_steps_trained: 815000
    num_steps_sampled: 815000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,815,20910.3,815000,-2.3466,-1.98,-2.95,234.66




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 816000
  custom_metrics: {}
  date: 2021-11-05_18-21-28
  done: false
  episode_len_mean: 233.74
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.337399999999994
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 5
  episodes_total: 3034
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010470303072935588
          cur_lr: 5.000000000000001e-05
          entropy: 0.6018125265836716
          entropy_coeff: 0.009999999999999998
          kl: 0.011170595109724325
          policy_loss: -0.07841594318548838
          total_loss: -0.06828570821219021
          vf_explained_var: 0.20220640301704407
          vf_loss: 0.016031402670260934
    num_agent_steps_sampled: 816000
    num_agent_steps_trained: 816000
    num_steps_sampled: 816000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,816,20954.4,816000,-2.3374,-1.98,-2.95,233.74


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 817000
  custom_metrics: {}
  date: 2021-11-05_18-21-54
  done: false
  episode_len_mean: 232.75
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.327499999999994
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 3038
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010470303072935588
          cur_lr: 5.000000000000001e-05
          entropy: 0.4308671676450305
          entropy_coeff: 0.009999999999999998
          kl: 0.017151004036796834
          policy_loss: 0.07864684462547303
          total_loss: 0.0830086869498094
          vf_explained_var: 0.4776745140552521
          vf_loss: 0.00849093849149843
    num_agent_steps_sampled: 817000
    num_agent_steps_trained: 817000
    num_steps_sampled: 817000
    num_steps_trained: 8170

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,817,20981,817000,-2.3275,-1.98,-2.95,232.75


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 818000
  custom_metrics: {}
  date: 2021-11-05_18-22-22
  done: false
  episode_len_mean: 231.97
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.319699999999994
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 3042
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010470303072935588
          cur_lr: 5.000000000000001e-05
          entropy: 0.5511813935306337
          entropy_coeff: 0.009999999999999998
          kl: 0.01956099224372273
          policy_loss: -0.020115860592987804
          total_loss: -0.013188548468881183
          vf_explained_var: 0.24170182645320892
          vf_loss: 0.01223431976719035
    num_agent_steps_sampled: 818000
    num_agent_steps_trained: 818000
    num_steps_sampled: 818000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,818,21008.1,818000,-2.3197,-1.98,-2.57,231.97


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 819000
  custom_metrics: {}
  date: 2021-11-05_18-22-48
  done: false
  episode_len_mean: 231.99
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.3198999999999943
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 3046
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010470303072935588
          cur_lr: 5.000000000000001e-05
          entropy: 0.5446534034278657
          entropy_coeff: 0.009999999999999998
          kl: 0.009592289159695927
          policy_loss: -0.08331525673468908
          total_loss: -0.07369834780693055
          vf_explained_var: 0.12139997631311417
          vf_loss: 0.014963006114380227
    num_agent_steps_sampled: 819000
    num_agent_steps_trained: 819000
    num_steps_sampled: 819000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,819,21034.7,819000,-2.3199,-1.98,-2.57,231.99


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 820000
  custom_metrics: {}
  date: 2021-11-05_18-23-15
  done: false
  episode_len_mean: 231.28
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.3127999999999944
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 5
  episodes_total: 3051
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010470303072935588
          cur_lr: 5.000000000000001e-05
          entropy: 0.42834463483757446
          entropy_coeff: 0.009999999999999998
          kl: 0.00981857498710096
          policy_loss: 0.01863467511203554
          total_loss: 0.029464833355612226
          vf_explained_var: 0.11267424374818802
          vf_loss: 0.015010798215452167
    num_agent_steps_sampled: 820000
    num_agent_steps_trained: 820000
    num_steps_sampled: 820000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,820,21061.6,820000,-2.3128,-1.98,-2.57,231.28


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 821000
  custom_metrics: {}
  date: 2021-11-05_18-23-42
  done: false
  episode_len_mean: 230.94
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.309399999999995
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 3055
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010470303072935588
          cur_lr: 5.000000000000001e-05
          entropy: 0.6352519624763064
          entropy_coeff: 0.009999999999999998
          kl: 0.04686809856366617
          policy_loss: 0.022866034300790893
          total_loss: 0.03059510990149445
          vf_explained_var: 0.13131798803806305
          vf_loss: 0.01359087130986154
    num_agent_steps_sampled: 821000
    num_agent_steps_trained: 821000
    num_steps_sampled: 821000
    num_steps_trained: 82

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,821,21088.6,821000,-2.3094,-1.98,-2.57,230.94




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 822000
  custom_metrics: {}
  date: 2021-11-05_18-24-26
  done: false
  episode_len_mean: 230.56
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.3055999999999948
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 5
  episodes_total: 3060
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.015705454609403376
          cur_lr: 5.000000000000001e-05
          entropy: 0.6116565969255235
          entropy_coeff: 0.009999999999999998
          kl: 0.06835606573074213
          policy_loss: -0.049550945808490114
          total_loss: -0.03758509755134583
          vf_explained_var: 0.14342331886291504
          vf_loss: 0.017008851882484223
    num_agent_steps_sampled: 822000
    num_agent_steps_trained: 822000
    num_steps_sampled: 822000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,822,21132.5,822000,-2.3056,-1.98,-2.57,230.56


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 823000
  custom_metrics: {}
  date: 2021-11-05_18-24-54
  done: false
  episode_len_mean: 230.33
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.303299999999995
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 3064
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.023558181914105077
          cur_lr: 5.000000000000001e-05
          entropy: 0.6615750001536476
          entropy_coeff: 0.009999999999999998
          kl: 0.01742214551254067
          policy_loss: 0.028014120790693493
          total_loss: 0.029788183752033445
          vf_explained_var: 0.3615072965621948
          vf_loss: 0.007979380662759973
    num_agent_steps_sampled: 823000
    num_agent_steps_trained: 823000
    num_steps_sampled: 823000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,823,21160.8,823000,-2.3033,-1.98,-2.57,230.33


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 824000
  custom_metrics: {}
  date: 2021-11-05_18-25-21
  done: false
  episode_len_mean: 230.5
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.304999999999995
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 3068
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.023558181914105077
          cur_lr: 5.000000000000001e-05
          entropy: 0.5975214421749115
          entropy_coeff: 0.009999999999999998
          kl: 0.012243336815207487
          policy_loss: -0.018750560449229348
          total_loss: -0.014176471945312288
          vf_explained_var: 0.3488653004169464
          vf_loss: 0.010260873096477654
    num_agent_steps_sampled: 824000
    num_agent_steps_trained: 824000
    num_steps_sampled: 824000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,824,21187.6,824000,-2.305,-1.98,-2.57,230.5


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 825000
  custom_metrics: {}
  date: 2021-11-05_18-25-47
  done: false
  episode_len_mean: 230.77
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.307699999999995
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 3072
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.023558181914105077
          cur_lr: 5.000000000000001e-05
          entropy: 0.6280817283524407
          entropy_coeff: 0.009999999999999998
          kl: 0.00796923074461549
          policy_loss: -0.01907418312297927
          total_loss: -0.016873038022054566
          vf_explained_var: 0.6249817609786987
          vf_loss: 0.008294222498726514
    num_agent_steps_sampled: 825000
    num_agent_steps_trained: 825000
    num_steps_sampled: 825000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,825,21213.8,825000,-2.3077,-1.98,-2.57,230.77


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 826000
  custom_metrics: {}
  date: 2021-11-05_18-26-14
  done: false
  episode_len_mean: 230.96
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.3095999999999943
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 5
  episodes_total: 3077
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.023558181914105077
          cur_lr: 5.000000000000001e-05
          entropy: 0.46504084865252177
          entropy_coeff: 0.009999999999999998
          kl: 0.007469669169276787
          policy_loss: -0.030962422428031763
          total_loss: -0.025477686006989745
          vf_explained_var: 0.6583248376846313
          vf_loss: 0.009959174056227009
    num_agent_steps_sampled: 826000
    num_agent_steps_trained: 826000
    num_steps_sampled: 826000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,826,21240.7,826000,-2.3096,-1.98,-2.57,230.96


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 827000
  custom_metrics: {}
  date: 2021-11-05_18-26-42
  done: false
  episode_len_mean: 230.7
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.3069999999999946
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 3081
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.023558181914105077
          cur_lr: 5.000000000000001e-05
          entropy: 0.5279316922028859
          entropy_coeff: 0.009999999999999998
          kl: 0.016347441946901617
          policy_loss: -0.011265952967935138
          total_loss: -0.0053838740620348186
          vf_explained_var: 0.42663976550102234
          vf_loss: 0.010776275789572133
    num_agent_steps_sampled: 827000
    num_agent_steps_trained: 827000
    num_steps_sampled: 827000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,827,21267.9,827000,-2.307,-1.98,-2.57,230.7


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 828000
  custom_metrics: {}
  date: 2021-11-05_18-27-09
  done: false
  episode_len_mean: 230.55
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.3054999999999946
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 5
  episodes_total: 3086
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.023558181914105077
          cur_lr: 5.000000000000001e-05
          entropy: 0.4552470819817649
          entropy_coeff: 0.009999999999999998
          kl: 0.030059627203595892
          policy_loss: -0.016700523098309834
          total_loss: -0.009934637861119377
          vf_explained_var: 0.3616562783718109
          vf_loss: 0.010610207584169177
    num_agent_steps_sampled: 828000
    num_agent_steps_trained: 828000
    num_steps_sampled: 828000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,828,21295.4,828000,-2.3055,-1.98,-2.57,230.55




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 829000
  custom_metrics: {}
  date: 2021-11-05_18-27-53
  done: false
  episode_len_mean: 230.12
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.3011999999999944
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 3090
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.035337272871157614
          cur_lr: 5.000000000000001e-05
          entropy: 0.37949997981389366
          entropy_coeff: 0.009999999999999998
          kl: 0.005356823578196431
          policy_loss: 0.05037515378660626
          total_loss: 0.05411713628305329
          vf_explained_var: 0.5084518194198608
          vf_loss: 0.0073476872406899926
    num_agent_steps_sampled: 829000
    num_agent_steps_trained: 829000
    num_steps_sampled: 829000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,829,21339.2,829000,-2.3012,-1.98,-2.57,230.12


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 830000
  custom_metrics: {}
  date: 2021-11-05_18-28-22
  done: false
  episode_len_mean: 230.25
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.3024999999999944
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 3094
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.035337272871157614
          cur_lr: 5.000000000000001e-05
          entropy: 0.4111707631084654
          entropy_coeff: 0.009999999999999998
          kl: 0.005528664832987494
          policy_loss: -0.04056722666654322
          total_loss: -0.03723218871487512
          vf_explained_var: 0.6405726671218872
          vf_loss: 0.007251379949351152
    num_agent_steps_sampled: 830000
    num_agent_steps_trained: 830000
    num_steps_sampled: 830000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,830,21368.2,830000,-2.3025,-1.98,-2.57,230.25


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 831000
  custom_metrics: {}
  date: 2021-11-05_18-28-49
  done: false
  episode_len_mean: 230.2
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.3019999999999943
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 5
  episodes_total: 3099
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.035337272871157614
          cur_lr: 5.000000000000001e-05
          entropy: 0.33268476393487717
          entropy_coeff: 0.009999999999999998
          kl: 0.016507414898405966
          policy_loss: -0.028758159403999648
          total_loss: -0.022017945473392805
          vf_explained_var: 0.39050012826919556
          vf_loss: 0.009483733612837063
    num_agent_steps_sampled: 831000
    num_agent_steps_trained: 831000
    num_steps_sampled: 831000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,831,21395.5,831000,-2.302,-1.98,-2.57,230.2


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 832000
  custom_metrics: {}
  date: 2021-11-05_18-29-17
  done: false
  episode_len_mean: 230.19
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.3018999999999945
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 3103
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.035337272871157614
          cur_lr: 5.000000000000001e-05
          entropy: 0.25960993336306676
          entropy_coeff: 0.009999999999999998
          kl: 0.0021802956788172247
          policy_loss: -0.023271201964881687
          total_loss: -0.015878479141328068
          vf_explained_var: 0.28744107484817505
          vf_loss: 0.009911775061239799
    num_agent_steps_sampled: 832000
    num_agent_steps_trained: 832000
    num_steps_sampled: 832000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,832,21423.5,832000,-2.3019,-1.98,-2.57,230.19


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 833000
  custom_metrics: {}
  date: 2021-11-05_18-29-45
  done: false
  episode_len_mean: 230.0
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.299999999999995
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 5
  episodes_total: 3108
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017668636435578807
          cur_lr: 5.000000000000001e-05
          entropy: 0.36290469302071465
          entropy_coeff: 0.009999999999999998
          kl: 0.022077650077890195
          policy_loss: -0.011147921201255587
          total_loss: -0.0026381535662545097
          vf_explained_var: 0.28290611505508423
          vf_loss: 0.011748732885138856
    num_agent_steps_sampled: 833000
    num_agent_steps_trained: 833000
    num_steps_sampled: 833000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,833,21450.9,833000,-2.3,-1.98,-2.57,230


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 834000
  custom_metrics: {}
  date: 2021-11-05_18-30-13
  done: false
  episode_len_mean: 229.81
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.2980999999999945
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 3112
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02650295465336821
          cur_lr: 5.000000000000001e-05
          entropy: 0.1985472281773885
          entropy_coeff: 0.009999999999999998
          kl: 0.0020527737931170503
          policy_loss: -0.06508293963140911
          total_loss: -0.05582443426052729
          vf_explained_var: 0.135917067527771
          vf_loss: 0.011189577314588759
    num_agent_steps_sampled: 834000
    num_agent_steps_trained: 834000
    num_steps_sampled: 834000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,834,21478.9,834000,-2.2981,-1.98,-2.57,229.81


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 835000
  custom_metrics: {}
  date: 2021-11-05_18-30-40
  done: false
  episode_len_mean: 229.35
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.293499999999995
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 5
  episodes_total: 3117
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013251477326684104
          cur_lr: 5.000000000000001e-05
          entropy: 0.21648735602696736
          entropy_coeff: 0.009999999999999998
          kl: 0.0029718010305600727
          policy_loss: 0.009761363847388162
          total_loss: 0.01979800363381704
          vf_explained_var: 0.1384728103876114
          vf_loss: 0.012162134879165225
    num_agent_steps_sampled: 835000
    num_agent_steps_trained: 835000
    num_steps_sampled: 835000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,835,21506.4,835000,-2.2935,-1.98,-2.57,229.35




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 836000
  custom_metrics: {}
  date: 2021-11-05_18-31-25
  done: false
  episode_len_mean: 228.84
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.288399999999995
  episode_reward_min: -2.569999999999989
  episodes_this_iter: 4
  episodes_total: 3121
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006625738663342052
          cur_lr: 5.000000000000001e-05
          entropy: 0.2932117740313212
          entropy_coeff: 0.009999999999999998
          kl: 0.019157305457675338
          policy_loss: -0.06860183849930764
          total_loss: -0.059671909279293486
          vf_explained_var: 0.15141576528549194
          vf_loss: 0.011735116649005149
    num_agent_steps_sampled: 836000
    num_agent_steps_trained: 836000
    num_steps_sampled: 836000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,836,21551.2,836000,-2.2884,-1.91,-2.57,228.84


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 837000
  custom_metrics: {}
  date: 2021-11-05_18-31-53
  done: false
  episode_len_mean: 228.73
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.2872999999999952
  episode_reward_min: -2.719999999999986
  episodes_this_iter: 5
  episodes_total: 3126
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006625738663342052
          cur_lr: 5.000000000000001e-05
          entropy: 0.34401684635215335
          entropy_coeff: 0.009999999999999998
          kl: 0.1152877123792687
          policy_loss: -0.032611768609947626
          total_loss: -0.022723465909560522
          vf_explained_var: 0.29668793082237244
          vf_loss: 0.0125646045897156
    num_agent_steps_sampled: 837000
    num_agent_steps_trained: 837000
    num_steps_sampled: 837000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,837,21578.6,837000,-2.2873,-1.91,-2.72,228.73


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 838000
  custom_metrics: {}
  date: 2021-11-05_18-32-18
  done: false
  episode_len_mean: 229.4
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.293999999999995
  episode_reward_min: -2.899999999999982
  episodes_this_iter: 4
  episodes_total: 3130
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009938607995013076
          cur_lr: 5.000000000000001e-05
          entropy: 0.9379243440098233
          entropy_coeff: 0.009999999999999998
          kl: 0.06161593388192348
          policy_loss: -0.009995838751395543
          total_loss: -0.004563929968410068
          vf_explained_var: 0.6709979772567749
          vf_loss: 0.014198774740927749
    num_agent_steps_sampled: 838000
    num_agent_steps_trained: 838000
    num_steps_sampled: 838000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,838,21603.7,838000,-2.294,-1.91,-2.9,229.4


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 839000
  custom_metrics: {}
  date: 2021-11-05_18-32-46
  done: false
  episode_len_mean: 229.1
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.290999999999995
  episode_reward_min: -2.899999999999982
  episodes_this_iter: 4
  episodes_total: 3134
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014907911992519613
          cur_lr: 5.000000000000001e-05
          entropy: 0.599415260222223
          entropy_coeff: 0.009999999999999998
          kl: 0.05715085383606417
          policy_loss: -0.00015408645073572796
          total_loss: 0.00957800762520896
          vf_explained_var: 0.3089118003845215
          vf_loss: 0.014874245826568868
    num_agent_steps_sampled: 839000
    num_agent_steps_trained: 839000
    num_steps_sampled: 839000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,839,21631.8,839000,-2.291,-1.91,-2.9,229.1


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 840000
  custom_metrics: {}
  date: 2021-11-05_18-33-14
  done: false
  episode_len_mean: 228.37
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.2836999999999947
  episode_reward_min: -2.899999999999982
  episodes_this_iter: 5
  episodes_total: 3139
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022361867988779427
          cur_lr: 5.000000000000001e-05
          entropy: 0.5590910992688602
          entropy_coeff: 0.009999999999999998
          kl: 0.07047416645790194
          policy_loss: 0.003500891269909011
          total_loss: 0.014180621090862487
          vf_explained_var: 0.28415539860725403
          vf_loss: 0.014694707851029104
    num_agent_steps_sampled: 840000
    num_agent_steps_trained: 840000
    num_steps_sampled: 840000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,840,21659.6,840000,-2.2837,-1.91,-2.9,228.37


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 841000
  custom_metrics: {}
  date: 2021-11-05_18-33-40
  done: false
  episode_len_mean: 228.56
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.285599999999995
  episode_reward_min: -2.899999999999982
  episodes_this_iter: 4
  episodes_total: 3143
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03354280198316913
          cur_lr: 5.000000000000001e-05
          entropy: 0.6387116743458642
          entropy_coeff: 0.009999999999999998
          kl: 0.018525878303975438
          policy_loss: 0.042559553268882964
          total_loss: 0.047049412669406994
          vf_explained_var: 0.3988599479198456
          vf_loss: 0.010255565038985677
    num_agent_steps_sampled: 841000
    num_agent_steps_trained: 841000
    num_steps_sampled: 841000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,841,21685.7,841000,-2.2856,-1.91,-2.9,228.56


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 842000
  custom_metrics: {}
  date: 2021-11-05_18-34-05
  done: false
  episode_len_mean: 229.3
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.292999999999995
  episode_reward_min: -2.899999999999982
  episodes_this_iter: 4
  episodes_total: 3147
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03354280198316913
          cur_lr: 5.000000000000001e-05
          entropy: 0.7485008782810635
          entropy_coeff: 0.009999999999999998
          kl: 0.011211685248437044
          policy_loss: 0.03797257658508089
          total_loss: 0.040193872567680144
          vf_explained_var: 0.3578696846961975
          vf_loss: 0.009330228995531797
    num_agent_steps_sampled: 842000
    num_agent_steps_trained: 842000
    num_steps_sampled: 842000
    num_steps_trained: 842

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,842,21710.7,842000,-2.293,-1.91,-2.9,229.3




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 843000
  custom_metrics: {}
  date: 2021-11-05_18-34-48
  done: false
  episode_len_mean: 230.02
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.300199999999995
  episode_reward_min: -2.899999999999982
  episodes_this_iter: 4
  episodes_total: 3151
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03354280198316913
          cur_lr: 5.000000000000001e-05
          entropy: 1.0072992046674092
          entropy_coeff: 0.009999999999999998
          kl: 0.013723380697080106
          policy_loss: -0.016201780902014837
          total_loss: -0.017293054693275026
          vf_explained_var: 0.477030485868454
          vf_loss: 0.008521398689804806
    num_agent_steps_sampled: 843000
    num_agent_steps_trained: 843000
    num_steps_sampled: 843000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,843,21754.2,843000,-2.3002,-1.91,-2.9,230.02


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 844000
  custom_metrics: {}
  date: 2021-11-05_18-35-11
  done: false
  episode_len_mean: 231.16
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.311599999999994
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 3
  episodes_total: 3154
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03354280198316913
          cur_lr: 5.000000000000001e-05
          entropy: 1.4910854074690076
          entropy_coeff: 0.009999999999999998
          kl: 0.027569253343284097
          policy_loss: -0.11311816647648812
          total_loss: -0.11515341740515497
          vf_explained_var: 0.45117419958114624
          vf_loss: 0.011950857052579523
    num_agent_steps_sampled: 844000
    num_agent_steps_trained: 844000
    num_steps_sampled: 844000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,844,21776.5,844000,-2.3116,-1.91,-3.02,231.16


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 845000
  custom_metrics: {}
  date: 2021-11-05_18-35-34
  done: false
  episode_len_mean: 233.09
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.330899999999994
  episode_reward_min: -3.139999999999977
  episodes_this_iter: 4
  episodes_total: 3158
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.050314202974753704
          cur_lr: 5.000000000000001e-05
          entropy: 1.2678016583124796
          entropy_coeff: 0.009999999999999998
          kl: 0.02007046862527062
          policy_loss: -0.013023663560549419
          total_loss: -0.016541575723224216
          vf_explained_var: 0.5065488219261169
          vf_loss: 0.008150271978229285
    num_agent_steps_sampled: 845000
    num_agent_steps_trained: 845000
    num_steps_sampled: 845000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,845,21799.6,845000,-2.3309,-1.91,-3.14,233.09


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 846000
  custom_metrics: {}
  date: 2021-11-05_18-35-58
  done: false
  episode_len_mean: 234.94
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.349399999999994
  episode_reward_min: -3.139999999999977
  episodes_this_iter: 4
  episodes_total: 3162
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07547130446213056
          cur_lr: 5.000000000000001e-05
          entropy: 1.6486837916904025
          entropy_coeff: 0.009999999999999998
          kl: 0.010343617247741892
          policy_loss: 0.005481487347020043
          total_loss: -0.0023930951952934264
          vf_explained_var: 0.4682021737098694
          vf_loss: 0.007831606620715723
    num_agent_steps_sampled: 846000
    num_agent_steps_trained: 846000
    num_steps_sampled: 846000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,846,21823.8,846000,-2.3494,-1.91,-3.14,234.94


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 847000
  custom_metrics: {}
  date: 2021-11-05_18-36-22
  done: false
  episode_len_mean: 235.41
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.3540999999999936
  episode_reward_min: -3.139999999999977
  episodes_this_iter: 3
  episodes_total: 3165
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07547130446213056
          cur_lr: 5.000000000000001e-05
          entropy: 1.395774201552073
          entropy_coeff: 0.009999999999999998
          kl: 0.014569430973971838
          policy_loss: -0.13009472439686456
          total_loss: -0.12879159698883694
          vf_explained_var: 0.045612990856170654
          vf_loss: 0.014161296075003015
    num_agent_steps_sampled: 847000
    num_agent_steps_trained: 847000
    num_steps_sampled: 847000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,847,21847.8,847000,-2.3541,-1.91,-3.14,235.41


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 848000
  custom_metrics: {}
  date: 2021-11-05_18-36-47
  done: false
  episode_len_mean: 236.78
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.3677999999999937
  episode_reward_min: -3.139999999999977
  episodes_this_iter: 4
  episodes_total: 3169
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07547130446213056
          cur_lr: 5.000000000000001e-05
          entropy: 1.3951592908965216
          entropy_coeff: 0.009999999999999998
          kl: 0.012138769368409739
          policy_loss: -0.10635329716735416
          total_loss: -0.10735248807403776
          vf_explained_var: 0.3894627094268799
          vf_loss: 0.012036274430445498
    num_agent_steps_sampled: 848000
    num_agent_steps_trained: 848000
    num_steps_sampled: 848000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,848,21872.6,848000,-2.3678,-1.91,-3.14,236.78


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 849000
  custom_metrics: {}
  date: 2021-11-05_18-37-11
  done: false
  episode_len_mean: 237.27
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.3726999999999934
  episode_reward_min: -3.139999999999977
  episodes_this_iter: 4
  episodes_total: 3173
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07547130446213056
          cur_lr: 5.000000000000001e-05
          entropy: 1.3844862156444127
          entropy_coeff: 0.009999999999999998
          kl: 0.0135291036754904
          policy_loss: -0.08578303489420148
          total_loss: -0.08504080971082052
          vf_explained_var: 0.2913420796394348
          vf_loss: 0.013566031058629354
    num_agent_steps_sampled: 849000
    num_agent_steps_trained: 849000
    num_steps_sampled: 849000
    num_steps_trained: 84

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,849,21897,849000,-2.3727,-1.91,-3.14,237.27


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 850000
  custom_metrics: {}
  date: 2021-11-05_18-37-37
  done: false
  episode_len_mean: 238.15
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.3814999999999933
  episode_reward_min: -3.139999999999977
  episodes_this_iter: 5
  episodes_total: 3178
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07547130446213056
          cur_lr: 5.000000000000001e-05
          entropy: 1.2163456598917644
          entropy_coeff: 0.009999999999999998
          kl: 0.01069608851313859
          policy_loss: -0.0026635022626982797
          total_loss: 0.005265702224440045
          vf_explained_var: 0.32387393712997437
          vf_loss: 0.019285413974689112
    num_agent_steps_sampled: 850000
    num_agent_steps_trained: 850000
    num_steps_sampled: 850000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,850,21922.8,850000,-2.3815,-1.91,-3.14,238.15




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 851000
  custom_metrics: {}
  date: 2021-11-05_18-38-18
  done: false
  episode_len_mean: 238.94
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.389399999999993
  episode_reward_min: -3.139999999999977
  episodes_this_iter: 4
  episodes_total: 3182
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07547130446213056
          cur_lr: 5.000000000000001e-05
          entropy: 1.318812574280633
          entropy_coeff: 0.009999999999999998
          kl: 0.014758565698962419
          policy_loss: 0.04559251194198926
          total_loss: 0.0393730138325029
          vf_explained_var: 0.761688232421875
          vf_loss: 0.005854777577850554
    num_agent_steps_sampled: 851000
    num_agent_steps_trained: 851000
    num_steps_sampled: 851000
    num_steps_trained: 851000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,851,21963.4,851000,-2.3894,-1.91,-3.14,238.94


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 852000
  custom_metrics: {}
  date: 2021-11-05_18-38-43
  done: false
  episode_len_mean: 239.91
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.3990999999999927
  episode_reward_min: -3.139999999999977
  episodes_this_iter: 4
  episodes_total: 3186
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07547130446213056
          cur_lr: 5.000000000000001e-05
          entropy: 1.2384111139509413
          entropy_coeff: 0.009999999999999998
          kl: 0.01663011849112534
          policy_loss: -0.06998631705840429
          total_loss: -0.07113232596053018
          vf_explained_var: 0.3977656960487366
          vf_loss: 0.009983005995551745
    num_agent_steps_sampled: 852000
    num_agent_steps_trained: 852000
    num_steps_sampled: 852000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,852,21988.3,852000,-2.3991,-1.91,-3.14,239.91


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 853000
  custom_metrics: {}
  date: 2021-11-05_18-39-09
  done: false
  episode_len_mean: 240.42
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.404199999999993
  episode_reward_min: -3.139999999999977
  episodes_this_iter: 4
  episodes_total: 3190
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07547130446213056
          cur_lr: 5.000000000000001e-05
          entropy: 1.2035782045788235
          entropy_coeff: 0.009999999999999998
          kl: 0.02357023522731995
          policy_loss: 0.03631191841430134
          total_loss: 0.033540382153458066
          vf_explained_var: 0.6210547089576721
          vf_loss: 0.007485368382185697
    num_agent_steps_sampled: 853000
    num_agent_steps_trained: 853000
    num_steps_sampled: 853000
    num_steps_trained: 853

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,853,22014.9,853000,-2.4042,-1.91,-3.14,240.42


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 854000
  custom_metrics: {}
  date: 2021-11-05_18-39-34
  done: false
  episode_len_mean: 241.23
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.4122999999999926
  episode_reward_min: -3.139999999999977
  episodes_this_iter: 4
  episodes_total: 3194
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11320695669319583
          cur_lr: 5.000000000000001e-05
          entropy: 1.2392691890398662
          entropy_coeff: 0.009999999999999998
          kl: 0.015028663125138269
          policy_loss: 0.0886290901237064
          total_loss: 0.0829209448562728
          vf_explained_var: 0.8167366981506348
          vf_loss: 0.00498319879019012
    num_agent_steps_sampled: 854000
    num_agent_steps_trained: 854000
    num_steps_sampled: 854000
    num_steps_trained: 85400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,854,22039.8,854000,-2.4123,-1.91,-3.14,241.23


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 855000
  custom_metrics: {}
  date: 2021-11-05_18-40-00
  done: false
  episode_len_mean: 241.86
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.418599999999992
  episode_reward_min: -3.139999999999977
  episodes_this_iter: 4
  episodes_total: 3198
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11320695669319583
          cur_lr: 5.000000000000001e-05
          entropy: 1.1952596267064413
          entropy_coeff: 0.009999999999999998
          kl: 0.005660836079147045
          policy_loss: -0.019756620211733713
          total_loss: -0.025610645446512433
          vf_explained_var: 0.7442896366119385
          vf_loss: 0.0054577258001599046
    num_agent_steps_sampled: 855000
    num_agent_steps_trained: 855000
    num_steps_sampled: 855000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,855,22065.2,855000,-2.4186,-1.91,-3.14,241.86


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 856000
  custom_metrics: {}
  date: 2021-11-05_18-40-25
  done: false
  episode_len_mean: 243.14
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.431399999999992
  episode_reward_min: -3.139999999999977
  episodes_this_iter: 4
  episodes_total: 3202
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11320695669319583
          cur_lr: 5.000000000000001e-05
          entropy: 1.2962601847118802
          entropy_coeff: 0.009999999999999998
          kl: 0.00726453046701781
          policy_loss: -0.013457635293404262
          total_loss: -0.01886938727564282
          vf_explained_var: 0.6388241052627563
          vf_loss: 0.006728454492986202
    num_agent_steps_sampled: 856000
    num_agent_steps_trained: 856000
    num_steps_sampled: 856000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,856,22090.3,856000,-2.4314,-1.91,-3.14,243.14


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 857000
  custom_metrics: {}
  date: 2021-11-05_18-40-50
  done: false
  episode_len_mean: 244.28
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.442799999999992
  episode_reward_min: -3.139999999999977
  episodes_this_iter: 4
  episodes_total: 3206
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11320695669319583
          cur_lr: 5.000000000000001e-05
          entropy: 1.368667279349433
          entropy_coeff: 0.009999999999999998
          kl: 0.019661714712204573
          policy_loss: 0.016563585814502506
          total_loss: 0.010797403876980145
          vf_explained_var: 0.7080245018005371
          vf_loss: 0.005694647940496604
    num_agent_steps_sampled: 857000
    num_agent_steps_trained: 857000
    num_steps_sampled: 857000
    num_steps_trained: 85

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,857,22114.9,857000,-2.4428,-1.91,-3.14,244.28




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 858000
  custom_metrics: {}
  date: 2021-11-05_18-41-31
  done: false
  episode_len_mean: 245.44
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.4543999999999913
  episode_reward_min: -3.139999999999977
  episodes_this_iter: 4
  episodes_total: 3210
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11320695669319583
          cur_lr: 5.000000000000001e-05
          entropy: 1.4013218959172566
          entropy_coeff: 0.009999999999999998
          kl: 0.038487799182649954
          policy_loss: 0.018506585558255514
          total_loss: 0.017207670542928908
          vf_explained_var: 0.5346944332122803
          vf_loss: 0.008357215662383372
    num_agent_steps_sampled: 858000
    num_agent_steps_trained: 858000
    num_steps_sampled: 858000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,858,22156.2,858000,-2.4544,-1.91,-3.14,245.44


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 859000
  custom_metrics: {}
  date: 2021-11-05_18-41-54
  done: false
  episode_len_mean: 247.39
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.4738999999999907
  episode_reward_min: -3.139999999999977
  episodes_this_iter: 3
  episodes_total: 3213
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16981043503979376
          cur_lr: 5.000000000000001e-05
          entropy: 1.6768535878923205
          entropy_coeff: 0.009999999999999998
          kl: 0.013737201951578253
          policy_loss: -0.11662949207756254
          total_loss: -0.12193760656648212
          vf_explained_var: 0.18219219148159027
          vf_loss: 0.009127703919592832
    num_agent_steps_sampled: 859000
    num_agent_steps_trained: 859000
    num_steps_sampled: 859000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,859,22179.8,859000,-2.4739,-1.91,-3.14,247.39


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 860000
  custom_metrics: {}
  date: 2021-11-05_18-42-19
  done: false
  episode_len_mean: 249.24
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.492399999999991
  episode_reward_min: -3.139999999999977
  episodes_this_iter: 4
  episodes_total: 3217
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16981043503979376
          cur_lr: 5.000000000000001e-05
          entropy: 1.6324408306015863
          entropy_coeff: 0.009999999999999998
          kl: 0.020498956240265896
          policy_loss: -0.019832776486873628
          total_loss: -0.027242802580197654
          vf_explained_var: 0.6785871386528015
          vf_loss: 0.0054334457756744494
    num_agent_steps_sampled: 860000
    num_agent_steps_trained: 860000
    num_steps_sampled: 860000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,860,22204.1,860000,-2.4924,-1.91,-3.14,249.24


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 861000
  custom_metrics: {}
  date: 2021-11-05_18-42-41
  done: false
  episode_len_mean: 251.43
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.51429999999999
  episode_reward_min: -3.139999999999977
  episodes_this_iter: 4
  episodes_total: 3221
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.5666857004165649
          entropy_coeff: 0.009999999999999998
          kl: 0.008865488164072528
          policy_loss: 0.0659082221488158
          total_loss: 0.057754504432280855
          vf_explained_var: 0.782126784324646
          vf_loss: 0.005254961340688169
    num_agent_steps_sampled: 861000
    num_agent_steps_trained: 861000
    num_steps_sampled: 861000
    num_steps_trained: 861000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,861,22226.5,861000,-2.5143,-2.05,-3.14,251.43


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 862000
  custom_metrics: {}
  date: 2021-11-05_18-43-05
  done: false
  episode_len_mean: 252.74
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.52739999999999
  episode_reward_min: -3.139999999999977
  episodes_this_iter: 4
  episodes_total: 3225
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.571758430533939
          entropy_coeff: 0.009999999999999998
          kl: 0.0076500117728506385
          policy_loss: -0.02766017640630404
          total_loss: -0.03520284733838505
          vf_explained_var: 0.6669915318489075
          vf_loss: 0.006226337362184293
    num_agent_steps_sampled: 862000
    num_agent_steps_trained: 862000
    num_steps_sampled: 862000
    num_steps_trained: 862

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,862,22250.1,862000,-2.5274,-2.05,-3.14,252.74


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 863000
  custom_metrics: {}
  date: 2021-11-05_18-43-29
  done: false
  episode_len_mean: 253.67
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5366999999999895
  episode_reward_min: -3.139999999999977
  episodes_this_iter: 3
  episodes_total: 3228
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.5467139456007215
          entropy_coeff: 0.009999999999999998
          kl: 0.012464295584766678
          policy_loss: 0.026840554301937422
          total_loss: 0.018550366577174928
          vf_explained_var: 0.8394758701324463
          vf_loss: 0.004002099401421017
    num_agent_steps_sampled: 863000
    num_agent_steps_trained: 863000
    num_steps_sampled: 863000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,863,22274.1,863000,-2.5367,-2.05,-3.14,253.67


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 864000
  custom_metrics: {}
  date: 2021-11-05_18-43-50
  done: false
  episode_len_mean: 255.37
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5536999999999894
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 4
  episodes_total: 3232
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.5561484747462802
          entropy_coeff: 0.009999999999999998
          kl: 0.00841953944252069
          policy_loss: 0.09730982399649091
          total_loss: 0.09036510421170128
          vf_explained_var: 0.7048001885414124
          vf_loss: 0.006472172241451012
    num_agent_steps_sampled: 864000
    num_agent_steps_trained: 864000
    num_steps_sampled: 864000
    num_steps_trained: 864

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,864,22295.2,864000,-2.5537,-2.05,-3.58,255.37


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 865000
  custom_metrics: {}
  date: 2021-11-05_18-44-11
  done: false
  episode_len_mean: 257.99
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5798999999999888
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 3
  episodes_total: 3235
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.547810090912713
          entropy_coeff: 0.009999999999999998
          kl: 0.01182364953253187
          policy_loss: 0.12460564499100049
          total_loss: 0.11942329365346167
          vf_explained_var: 0.45332205295562744
          vf_loss: 0.007284078594400651
    num_agent_steps_sampled: 865000
    num_agent_steps_trained: 865000
    num_steps_sampled: 865000
    num_steps_trained: 865

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,865,22316.2,865000,-2.5799,-2.05,-3.58,257.99


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 866000
  custom_metrics: {}
  date: 2021-11-05_18-44-34
  done: false
  episode_len_mean: 260.25
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.6024999999999885
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 4
  episodes_total: 3239
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.4641332652833727
          entropy_coeff: 0.009999999999999998
          kl: 0.007986318135614233
          policy_loss: -0.01136075138217873
          total_loss: -0.014430887872974078
          vf_explained_var: 0.4433976411819458
          vf_loss: 0.009536955872964529
    num_agent_steps_sampled: 866000
    num_agent_steps_trained: 866000
    num_steps_sampled: 866000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,866,22339.4,866000,-2.6025,-2.05,-3.58,260.25




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 867000
  custom_metrics: {}
  date: 2021-11-05_18-45-15
  done: false
  episode_len_mean: 261.22
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.6121999999999876
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 3
  episodes_total: 3242
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.5494393163257174
          entropy_coeff: 0.009999999999999998
          kl: 0.010244268399662025
          policy_loss: 0.03956050193972058
          total_loss: 0.033529918640851974
          vf_explained_var: 0.5806854963302612
          vf_loss: 0.0068544296101511765
    num_agent_steps_sampled: 867000
    num_agent_steps_trained: 867000
    num_steps_sampled: 867000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,867,22380.4,867000,-2.6122,-2.05,-3.58,261.22


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 868000
  custom_metrics: {}
  date: 2021-11-05_18-45-37
  done: false
  episode_len_mean: 262.57
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.6256999999999873
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 4
  episodes_total: 3246
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.5352349135610792
          entropy_coeff: 0.009999999999999998
          kl: 0.013793333807437512
          policy_loss: 0.0019005493571360905
          total_loss: 0.002446101274755266
          vf_explained_var: 0.31084078550338745
          vf_loss: 0.01238452217852076
    num_agent_steps_sampled: 868000
    num_agent_steps_trained: 868000
    num_steps_sampled: 868000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,868,22402.2,868000,-2.6257,-2.05,-3.58,262.57


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 869000
  custom_metrics: {}
  date: 2021-11-05_18-45-57
  done: false
  episode_len_mean: 264.72
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.647199999999988
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 3
  episodes_total: 3249
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.576893444856008
          entropy_coeff: 0.009999999999999998
          kl: 0.010571142997432838
          policy_loss: 0.06538007275925742
          total_loss: 0.061705607341395484
          vf_explained_var: 0.0392029695212841
          vf_loss: 0.009401831973162997
    num_agent_steps_sampled: 869000
    num_agent_steps_trained: 869000
    num_steps_sampled: 869000
    num_steps_trained: 869

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,869,22422.5,869000,-2.6472,-2.05,-3.58,264.72


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 870000
  custom_metrics: {}
  date: 2021-11-05_18-46-20
  done: false
  episode_len_mean: 266.27
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.6626999999999867
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 3
  episodes_total: 3252
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.5747481717003717
          entropy_coeff: 0.009999999999999998
          kl: 0.00952317878700768
          policy_loss: -0.04904096225897471
          total_loss: -0.05039331598414315
          vf_explained_var: 0.06822970509529114
          vf_loss: 0.011969427624717355
    num_agent_steps_sampled: 870000
    num_agent_steps_trained: 870000
    num_steps_sampled: 870000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,870,22445.1,870000,-2.6627,-2.05,-3.58,266.27


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 871000
  custom_metrics: {}
  date: 2021-11-05_18-46-43
  done: false
  episode_len_mean: 265.26
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.652599999999987
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 4
  episodes_total: 3256
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.4981780886650085
          entropy_coeff: 0.009999999999999998
          kl: 0.007360729056449837
          policy_loss: 0.011886795527405208
          total_loss: 0.011895127760039436
          vf_explained_var: 0.26343056559562683
          vf_loss: 0.013115219016455942
    num_agent_steps_sampled: 871000
    num_agent_steps_trained: 871000
    num_steps_sampled: 871000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,871,22468.4,871000,-2.6526,-2.05,-3.58,265.26


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 872000
  custom_metrics: {}
  date: 2021-11-05_18-47-05
  done: false
  episode_len_mean: 266.06
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.6605999999999876
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 3
  episodes_total: 3259
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.546114238103231
          entropy_coeff: 0.009999999999999998
          kl: 0.011987218072714453
          policy_loss: -0.1085559477408727
          total_loss: -0.10773518226212925
          vf_explained_var: 0.24210429191589355
          vf_loss: 0.013228575761119524
    num_agent_steps_sampled: 872000
    num_agent_steps_trained: 872000
    num_steps_sampled: 872000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,872,22490.1,872000,-2.6606,-2.05,-3.58,266.06


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 873000
  custom_metrics: {}
  date: 2021-11-05_18-47-27
  done: false
  episode_len_mean: 267.08
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.670799999999987
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 4
  episodes_total: 3263
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.475563281112247
          entropy_coeff: 0.009999999999999998
          kl: 0.011483580390281547
          policy_loss: 0.006875702821546131
          total_loss: 0.008044265458981197
          vf_explained_var: 0.21030838787555695
          vf_loss: 0.012999148294329643
    num_agent_steps_sampled: 873000
    num_agent_steps_trained: 873000
    num_steps_sampled: 873000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,873,22511.5,873000,-2.6708,-2.05,-3.58,267.08


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 874000
  custom_metrics: {}
  date: 2021-11-05_18-47-49
  done: false
  episode_len_mean: 267.68
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.6767999999999863
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 3
  episodes_total: 3266
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.574102841483222
          entropy_coeff: 0.009999999999999998
          kl: 0.009439980110495582
          policy_loss: -0.06653829208678669
          total_loss: -0.06873404814137353
          vf_explained_var: 0.22337953746318817
          vf_loss: 0.011140763169775407
    num_agent_steps_sampled: 874000
    num_agent_steps_trained: 874000
    num_steps_sampled: 874000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,874,22534.1,874000,-2.6768,-2.05,-3.58,267.68




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 875000
  custom_metrics: {}
  date: 2021-11-05_18-48-28
  done: false
  episode_len_mean: 268.86
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.6885999999999863
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 4
  episodes_total: 3270
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.615172396765815
          entropy_coeff: 0.009999999999999998
          kl: 0.011892872967620368
          policy_loss: 0.023285404841105143
          total_loss: 0.023855508863925935
          vf_explained_var: 0.2473527491092682
          vf_loss: 0.01369252973753545
    num_agent_steps_sampled: 875000
    num_agent_steps_trained: 875000
    num_steps_sampled: 875000
    num_steps_trained: 87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,875,22572.8,875000,-2.6886,-2.05,-3.58,268.86


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 876000
  custom_metrics: {}
  date: 2021-11-05_18-48-51
  done: false
  episode_len_mean: 269.43
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.694299999999986
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 4
  episodes_total: 3274
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.6438138153817918
          entropy_coeff: 0.009999999999999998
          kl: 0.009929522529711143
          policy_loss: -0.00379564149512185
          total_loss: -0.005057315693961249
          vf_explained_var: 0.3549806475639343
          vf_loss: 0.01264725797292259
    num_agent_steps_sampled: 876000
    num_agent_steps_trained: 876000
    num_steps_sampled: 876000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,876,22596.4,876000,-2.6943,-2.05,-3.58,269.43


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 877000
  custom_metrics: {}
  date: 2021-11-05_18-49-13
  done: false
  episode_len_mean: 270.89
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.7088999999999857
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 3
  episodes_total: 3277
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.6256073925230239
          entropy_coeff: 0.009999999999999998
          kl: 0.010660470318506062
          policy_loss: 0.03518023904826906
          total_loss: 0.0315967272553179
          vf_explained_var: 0.44433605670928955
          vf_loss: 0.009957170644904383
    num_agent_steps_sampled: 877000
    num_agent_steps_trained: 877000
    num_steps_sampled: 877000
    num_steps_trained: 87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,877,22618.2,877000,-2.7089,-2.05,-3.58,270.89


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 878000
  custom_metrics: {}
  date: 2021-11-05_18-49-34
  done: false
  episode_len_mean: 272.85
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.7284999999999853
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 3
  episodes_total: 3280
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.678285120593177
          entropy_coeff: 0.009999999999999998
          kl: 0.010266669287467936
          policy_loss: -0.02256092420882649
          total_loss: -0.02652433431810803
          vf_explained_var: 0.1328955739736557
          vf_loss: 0.010204361650782328
    num_agent_steps_sampled: 878000
    num_agent_steps_trained: 878000
    num_steps_sampled: 878000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,878,22638.4,878000,-2.7285,-2.05,-3.58,272.85


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 879000
  custom_metrics: {}
  date: 2021-11-05_18-49-56
  done: false
  episode_len_mean: 274.66
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.7465999999999853
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 4
  episodes_total: 3284
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.7167152868376838
          entropy_coeff: 0.009999999999999998
          kl: 0.014530684200437191
          policy_loss: -0.0076483114726013605
          total_loss: -0.007515405056377252
          vf_explained_var: 0.28174638748168945
          vf_loss: 0.013598867981798119
    num_agent_steps_sampled: 879000
    num_agent_steps_trained: 879000
    num_steps_sampled: 879000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,879,22660.6,879000,-2.7466,-2.05,-3.58,274.66


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 880000
  custom_metrics: {}
  date: 2021-11-05_18-50-16
  done: false
  episode_len_mean: 276.85
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.7684999999999844
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 3
  episodes_total: 3287
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.7517979635132683
          entropy_coeff: 0.009999999999999998
          kl: 0.007324272280988551
          policy_loss: 0.03959814012050629
          total_loss: 0.033617497401105034
          vf_explained_var: 0.3379432260990143
          vf_loss: 0.009671729296031925
    num_agent_steps_sampled: 880000
    num_agent_steps_trained: 880000
    num_steps_sampled: 880000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,880,22681.3,880000,-2.7685,-2.05,-3.58,276.85


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 881000
  custom_metrics: {}
  date: 2021-11-05_18-50-36
  done: false
  episode_len_mean: 279.75
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.7974999999999848
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 3
  episodes_total: 3290
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.7214514679378934
          entropy_coeff: 0.009999999999999998
          kl: 0.010826171573401508
          policy_loss: 0.022607831160227458
          total_loss: 0.019208082722293005
          vf_explained_var: 0.1253606528043747
          vf_loss: 0.011057169329271548
    num_agent_steps_sampled: 881000
    num_agent_steps_trained: 881000
    num_steps_sampled: 881000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,881,22701.1,881000,-2.7975,-2.05,-3.58,279.75


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 882000
  custom_metrics: {}
  date: 2021-11-05_18-50-56
  done: false
  episode_len_mean: 282.37
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.8236999999999837
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 3
  episodes_total: 3293
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.7016852736473083
          entropy_coeff: 0.009999999999999998
          kl: 0.010407506955902493
          policy_loss: 0.03253070132599937
          total_loss: 0.025533320009708406
          vf_explained_var: 0.4302399456501007
          vf_loss: 0.007368514922887294
    num_agent_steps_sampled: 882000
    num_agent_steps_trained: 882000
    num_steps_sampled: 882000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,882,22720.8,882000,-2.8237,-2.05,-3.58,282.37


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 883000
  custom_metrics: {}
  date: 2021-11-05_18-51-16
  done: false
  episode_len_mean: 284.83
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.848299999999983
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 3
  episodes_total: 3296
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.637303728527493
          entropy_coeff: 0.009999999999999998
          kl: 0.005827672272219549
          policy_loss: 0.061265324056148526
          total_loss: 0.054318153361479445
          vf_explained_var: 0.10455548018217087
          vf_loss: 0.007941465629523413
    num_agent_steps_sampled: 883000
    num_agent_steps_trained: 883000
    num_steps_sampled: 883000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,883,22740.2,883000,-2.8483,-2.05,-3.58,284.83


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 884000
  custom_metrics: {}
  date: 2021-11-05_18-51-35
  done: false
  episode_len_mean: 287.02
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.8701999999999828
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 3
  episodes_total: 3299
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.7026001784536573
          entropy_coeff: 0.009999999999999998
          kl: 0.01059734155078505
          policy_loss: 0.03417990456024806
          total_loss: 0.03194184435738458
          vf_explained_var: -0.012281354516744614
          vf_loss: 0.012088632830677347
    num_agent_steps_sampled: 884000
    num_agent_steps_trained: 884000
    num_steps_sampled: 884000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,884,22760.2,884000,-2.8702,-2.05,-3.58,287.02




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 885000
  custom_metrics: {}
  date: 2021-11-05_18-52-16
  done: false
  episode_len_mean: 288.39
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.883899999999983
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 3
  episodes_total: 3302
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.6481511327955458
          entropy_coeff: 0.009999999999999998
          kl: 0.017089730723320113
          policy_loss: -0.12992402058508662
          total_loss: -0.12633435510926777
          vf_explained_var: 0.3098616600036621
          vf_loss: 0.015718156564980747
    num_agent_steps_sampled: 885000
    num_agent_steps_trained: 885000
    num_steps_sampled: 885000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,885,22800.2,885000,-2.8839,-2.05,-3.58,288.39


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 886000
  custom_metrics: {}
  date: 2021-11-05_18-52-38
  done: false
  episode_len_mean: 289.96
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.8995999999999817
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 4
  episodes_total: 3306
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.5388593806160822
          entropy_coeff: 0.009999999999999998
          kl: 0.019528599784639607
          policy_loss: 0.01682666622930103
          total_loss: 0.024017524388101365
          vf_explained_var: 0.11109840869903564
          vf_loss: 0.01760521342770921
    num_agent_steps_sampled: 886000
    num_agent_steps_trained: 886000
    num_steps_sampled: 886000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,886,22822.3,886000,-2.8996,-2.05,-3.58,289.96


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 887000
  custom_metrics: {}
  date: 2021-11-05_18-53-01
  done: false
  episode_len_mean: 290.53
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.9052999999999813
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 4
  episodes_total: 3310
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.5964153091112772
          entropy_coeff: 0.009999999999999998
          kl: 0.008979377810459255
          policy_loss: 0.017345563405089907
          total_loss: 0.017188585880729888
          vf_explained_var: 0.19267569482326508
          vf_loss: 0.01351998457685113
    num_agent_steps_sampled: 887000
    num_agent_steps_trained: 887000
    num_steps_sampled: 887000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,887,22846,887000,-2.9053,-2.39,-3.58,290.53


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 888000
  custom_metrics: {}
  date: 2021-11-05_18-53-24
  done: false
  episode_len_mean: 290.42
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.904199999999982
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 3
  episodes_total: 3313
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.6708065774705676
          entropy_coeff: 0.009999999999999998
          kl: 0.009234535428867869
          policy_loss: -0.027127057313919067
          total_loss: -0.031630712664789624
          vf_explained_var: 0.2554093897342682
          vf_loss: 0.009852233505807818
    num_agent_steps_sampled: 888000
    num_agent_steps_trained: 888000
    num_steps_sampled: 888000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,888,22868.6,888000,-2.9042,-2.39,-3.58,290.42


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 889000
  custom_metrics: {}
  date: 2021-11-05_18-53-46
  done: false
  episode_len_mean: 291.26
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.9125999999999816
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 4
  episodes_total: 3317
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.6342284149593778
          entropy_coeff: 0.009999999999999998
          kl: 0.010120948280227221
          policy_loss: -0.04297548586295711
          total_loss: -0.043599194495214356
          vf_explained_var: 0.3631272614002228
          vf_loss: 0.013140607728726335
    num_agent_steps_sampled: 889000
    num_agent_steps_trained: 889000
    num_steps_sampled: 889000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,889,22890.2,889000,-2.9126,-2.39,-3.58,291.26


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 890000
  custom_metrics: {}
  date: 2021-11-05_18-54-09
  done: false
  episode_len_mean: 291.64
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.9163999999999817
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 3
  episodes_total: 3320
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.7339429643419053
          entropy_coeff: 0.009999999999999998
          kl: 0.012135129611023861
          policy_loss: -0.033355946838855746
          total_loss: -0.03598653951452838
          vf_explained_var: -0.022371700033545494
          vf_loss: 0.01161782706880735
    num_agent_steps_sampled: 890000
    num_agent_steps_trained: 890000
    num_steps_sampled: 890000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,890,22913.3,890000,-2.9164,-2.39,-3.58,291.64


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 891000
  custom_metrics: {}
  date: 2021-11-05_18-54-30
  done: false
  episode_len_mean: 292.44
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.9243999999999812
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 3
  episodes_total: 3323
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.6284349507755704
          entropy_coeff: 0.009999999999999998
          kl: 0.011948467271723117
          policy_loss: -0.1282058777908484
          total_loss: -0.12657675676875643
          vf_explained_var: 0.11679712682962418
          vf_loss: 0.014870008257114225
    num_agent_steps_sampled: 891000
    num_agent_steps_trained: 891000
    num_steps_sampled: 891000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,891,22934.1,891000,-2.9244,-2.39,-3.58,292.44


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 892000
  custom_metrics: {}
  date: 2021-11-05_18-54-50
  done: false
  episode_len_mean: 294.7
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.946999999999981
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 4
  episodes_total: 3327
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.5935389836629232
          entropy_coeff: 0.009999999999999998
          kl: 0.007430459669294325
          policy_loss: -0.0008540685806009505
          total_loss: -0.0016739240951008268
          vf_explained_var: 0.24882648885250092
          vf_loss: 0.01322287858153383
    num_agent_steps_sampled: 892000
    num_agent_steps_trained: 892000
    num_steps_sampled: 892000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,892,22954.2,892000,-2.947,-2.39,-3.58,294.7




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 893000
  custom_metrics: {}
  date: 2021-11-05_18-55-28
  done: false
  episode_len_mean: 295.53
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.9552999999999816
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 3
  episodes_total: 3330
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.608926154507531
          entropy_coeff: 0.009999999999999998
          kl: 0.012399587522532821
          policy_loss: 0.035800369746155206
          total_loss: 0.03435657976402177
          vf_explained_var: 0.11897265911102295
          vf_loss: 0.011487101825575033
    num_agent_steps_sampled: 893000
    num_agent_steps_trained: 893000
    num_steps_sampled: 893000
    num_steps_trained: 89

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,893,22992.5,893000,-2.9553,-2.39,-3.58,295.53


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 894000
  custom_metrics: {}
  date: 2021-11-05_18-55-51
  done: false
  episode_len_mean: 294.75
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.947499999999981
  episode_reward_min: -3.5499999999999683
  episodes_this_iter: 4
  episodes_total: 3334
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.5374011821217006
          entropy_coeff: 0.009999999999999998
          kl: 0.010287708842656389
          policy_loss: -0.017446987455089887
          total_loss: -0.020088089174694485
          vf_explained_var: 0.386641263961792
          vf_loss: 0.010112467558226652
    num_agent_steps_sampled: 894000
    num_agent_steps_trained: 894000
    num_steps_sampled: 894000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,894,23015.2,894000,-2.9475,-2.39,-3.55,294.75


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 895000
  custom_metrics: {}
  date: 2021-11-05_18-56-13
  done: false
  episode_len_mean: 294.03
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.940299999999981
  episode_reward_min: -3.519999999999969
  episodes_this_iter: 3
  episodes_total: 3337
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2547156525596906
          cur_lr: 5.000000000000001e-05
          entropy: 1.6537210901578268
          entropy_coeff: 0.009999999999999998
          kl: 0.025858222975572272
          policy_loss: 0.03148163863354259
          total_loss: 0.02799846273329523
          vf_explained_var: 0.6013302206993103
          vf_loss: 0.00646753877065041
    num_agent_steps_sampled: 895000
    num_agent_steps_trained: 895000
    num_steps_sampled: 895000
    num_steps_trained: 895000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,895,23037.3,895000,-2.9403,-2.39,-3.52,294.03


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 896000
  custom_metrics: {}
  date: 2021-11-05_18-56-36
  done: false
  episode_len_mean: 294.02
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -2.9401999999999804
  episode_reward_min: -3.519999999999969
  episodes_this_iter: 4
  episodes_total: 3341
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.382073478839536
          cur_lr: 5.000000000000001e-05
          entropy: 1.7284239967664083
          entropy_coeff: 0.009999999999999998
          kl: 0.012007583425873571
          policy_loss: 0.0481396901110808
          total_loss: 0.04824020697010888
          vf_explained_var: 0.3637627959251404
          vf_loss: 0.012796976826050216
    num_agent_steps_sampled: 896000
    num_agent_steps_trained: 896000
    num_steps_sampled: 896000
    num_steps_trained: 89600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,896,23060.6,896000,-2.9402,-2.59,-3.52,294.02


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 897000
  custom_metrics: {}
  date: 2021-11-05_18-56-57
  done: false
  episode_len_mean: 294.51
  episode_media: {}
  episode_reward_max: -2.5999999999999885
  episode_reward_mean: -2.9450999999999805
  episode_reward_min: -3.519999999999969
  episodes_this_iter: 3
  episodes_total: 3344
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.382073478839536
          cur_lr: 5.000000000000001e-05
          entropy: 1.6227880835533142
          entropy_coeff: 0.009999999999999998
          kl: 0.027461264637406065
          policy_loss: -0.03711914436684714
          total_loss: -0.0299081661634975
          vf_explained_var: 0.037662334740161896
          vf_loss: 0.012946640165884876
    num_agent_steps_sampled: 897000
    num_agent_steps_trained: 897000
    num_steps_sampled: 897000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,897,23081.7,897000,-2.9451,-2.6,-3.52,294.51


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 898000
  custom_metrics: {}
  date: 2021-11-05_18-57-19
  done: false
  episode_len_mean: 295.16
  episode_media: {}
  episode_reward_max: -2.5999999999999885
  episode_reward_mean: -2.951599999999981
  episode_reward_min: -3.519999999999969
  episodes_this_iter: 3
  episodes_total: 3347
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.5859279659059313
          entropy_coeff: 0.009999999999999998
          kl: 0.007352700470714227
          policy_loss: -0.12247067135241296
          total_loss: -0.12209949327839745
          vf_explained_var: 0.4596259295940399
          vf_loss: 0.012016549194231629
    num_agent_steps_sampled: 898000
    num_agent_steps_trained: 898000
    num_steps_sampled: 898000
    num_steps_trained: 89

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,898,23103.7,898000,-2.9516,-2.6,-3.52,295.16


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 899000
  custom_metrics: {}
  date: 2021-11-05_18-57-42
  done: false
  episode_len_mean: 293.79
  episode_media: {}
  episode_reward_max: -2.5999999999999885
  episode_reward_mean: -2.937899999999981
  episode_reward_min: -3.519999999999969
  episodes_this_iter: 4
  episodes_total: 3351
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.6688928021325005
          entropy_coeff: 0.009999999999999998
          kl: 0.005590290073372885
          policy_loss: 0.019428276187843745
          total_loss: 0.019070842986305556
          vf_explained_var: 0.4016180634498596
          vf_loss: 0.01312764205245508
    num_agent_steps_sampled: 899000
    num_agent_steps_trained: 899000
    num_steps_sampled: 899000
    num_steps_trained: 899

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,899,23126.3,899000,-2.9379,-2.6,-3.52,293.79


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 900000
  custom_metrics: {}
  date: 2021-11-05_18-58-06
  done: false
  episode_len_mean: 293.43
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.9342999999999813
  episode_reward_min: -3.519999999999969
  episodes_this_iter: 4
  episodes_total: 3355
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.579340218173133
          entropy_coeff: 0.009999999999999998
          kl: 0.008275327851673116
          policy_loss: 0.036073017203145555
          total_loss: 0.03818808578782611
          vf_explained_var: 0.3284836411476135
          vf_loss: 0.013165796905135115
    num_agent_steps_sampled: 900000
    num_agent_steps_trained: 900000
    num_steps_sampled: 900000
    num_steps_trained: 9000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,900,23149.9,900000,-2.9343,-2.43,-3.52,293.43


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 901000
  custom_metrics: {}
  date: 2021-11-05_18-58-28
  done: false
  episode_len_mean: 293.31
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.9330999999999805
  episode_reward_min: -3.519999999999969
  episodes_this_iter: 3
  episodes_total: 3358
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.6093304395675658
          entropy_coeff: 0.009999999999999998
          kl: 0.007475034296268746
          policy_loss: -0.001567874848842621
          total_loss: -0.004086676571104262
          vf_explained_var: 0.3990222215652466
          vf_loss: 0.009290483724584596
    num_agent_steps_sampled: 901000
    num_agent_steps_trained: 901000
    num_steps_sampled: 901000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,901,23172.4,901000,-2.9331,-2.43,-3.52,293.31




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 902000
  custom_metrics: {}
  date: 2021-11-05_18-59-09
  done: false
  episode_len_mean: 292.23
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.9222999999999812
  episode_reward_min: -3.519999999999969
  episodes_this_iter: 4
  episodes_total: 3362
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.6270959244834051
          entropy_coeff: 0.009999999999999998
          kl: 0.008748746231539744
          policy_loss: -0.03336344535152117
          total_loss: -0.03454682048824098
          vf_explained_var: 0.5769054889678955
          vf_loss: 0.010073587344959379
    num_agent_steps_sampled: 902000
    num_agent_steps_trained: 902000
    num_steps_sampled: 902000
    num_steps_trained: 90

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,902,23212.9,902000,-2.9223,-2.3,-3.52,292.23


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 903000
  custom_metrics: {}
  date: 2021-11-05_18-59-33
  done: false
  episode_len_mean: 291.35
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.9134999999999813
  episode_reward_min: -3.519999999999969
  episodes_this_iter: 4
  episodes_total: 3366
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.5811733961105348
          entropy_coeff: 0.009999999999999998
          kl: 0.009211694685510155
          policy_loss: 0.004944834858179092
          total_loss: 0.006858269042438931
          vf_explained_var: 0.37504905462265015
          vf_loss: 0.012445848517947727
    num_agent_steps_sampled: 903000
    num_agent_steps_trained: 903000
    num_steps_sampled: 903000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,903,23237,903000,-2.9135,-2.3,-3.52,291.35


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 904000
  custom_metrics: {}
  date: 2021-11-05_18-59-57
  done: false
  episode_len_mean: 290.61
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.9060999999999813
  episode_reward_min: -3.519999999999969
  episodes_this_iter: 4
  episodes_total: 3370
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.65465477042728
          entropy_coeff: 0.009999999999999998
          kl: 0.007695142776595137
          policy_loss: -0.013139593642618921
          total_loss: -0.013941914670997195
          vf_explained_var: 0.5570429563522339
          vf_loss: 0.01133406432862911
    num_agent_steps_sampled: 904000
    num_agent_steps_trained: 904000
    num_steps_sampled: 904000
    num_steps_trained: 904

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,904,23261,904000,-2.9061,-2.3,-3.52,290.61


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 905000
  custom_metrics: {}
  date: 2021-11-05_19-00-21
  done: false
  episode_len_mean: 290.19
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.901899999999982
  episode_reward_min: -3.519999999999969
  episodes_this_iter: 3
  episodes_total: 3373
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.608413565158844
          entropy_coeff: 0.009999999999999998
          kl: 0.009657717136663043
          policy_loss: -0.11503145065572527
          total_loss: -0.1176557310960359
          vf_explained_var: 0.6950454115867615
          vf_loss: 0.007924920801694194
    num_agent_steps_sampled: 905000
    num_agent_steps_trained: 905000
    num_steps_sampled: 905000
    num_steps_trained: 90500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,905,23284.7,905000,-2.9019,-2.3,-3.52,290.19


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 906000
  custom_metrics: {}
  date: 2021-11-05_19-00-45
  done: false
  episode_len_mean: 289.37
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.8936999999999826
  episode_reward_min: -3.519999999999969
  episodes_this_iter: 4
  episodes_total: 3377
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.5291385862562392
          entropy_coeff: 0.009999999999999998
          kl: 0.009200326753648153
          policy_loss: -0.03776569652060668
          total_loss: -0.040402908706002764
          vf_explained_var: 0.79143226146698
          vf_loss: 0.007381370730905069
    num_agent_steps_sampled: 906000
    num_agent_steps_trained: 906000
    num_steps_sampled: 906000
    num_steps_trained: 906

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,906,23309,906000,-2.8937,-2.3,-3.52,289.37


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 907000
  custom_metrics: {}
  date: 2021-11-05_19-01-09
  done: false
  episode_len_mean: 287.08
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.8707999999999823
  episode_reward_min: -3.519999999999969
  episodes_this_iter: 4
  episodes_total: 3381
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.5670770168304444
          entropy_coeff: 0.009999999999999998
          kl: 0.007800050312847399
          policy_loss: -0.03341723622547255
          total_loss: -0.03328624392549197
          vf_explained_var: 0.5504723191261292
          vf_loss: 0.011331471179922422
    num_agent_steps_sampled: 907000
    num_agent_steps_trained: 907000
    num_steps_sampled: 907000
    num_steps_trained: 90

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,907,23333.4,907000,-2.8708,-2.3,-3.52,287.08


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 908000
  custom_metrics: {}
  date: 2021-11-05_19-01-34
  done: false
  episode_len_mean: 285.84
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.8583999999999827
  episode_reward_min: -3.519999999999969
  episodes_this_iter: 4
  episodes_total: 3385
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.5718564165963067
          entropy_coeff: 0.009999999999999998
          kl: 0.0072537727416149955
          policy_loss: 0.09912994692309035
          total_loss: 0.09402827821258042
          vf_explained_var: 0.7980976700782776
          vf_loss: 0.006459682160574529
    num_agent_steps_sampled: 908000
    num_agent_steps_trained: 908000
    num_steps_sampled: 908000
    num_steps_trained: 908

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,908,23357.6,908000,-2.8584,-2.3,-3.52,285.84


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 909000
  custom_metrics: {}
  date: 2021-11-05_19-01-59
  done: false
  episode_len_mean: 282.65
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -2.826499999999983
  episode_reward_min: -3.519999999999969
  episodes_this_iter: 4
  episodes_total: 3389
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.738080041938358
          entropy_coeff: 0.009999999999999998
          kl: 0.008401962913506762
          policy_loss: 0.044679259260495506
          total_loss: 0.04101758119132784
          vf_explained_var: 0.5561564564704895
          vf_loss: 0.008903869308738245
    num_agent_steps_sampled: 909000
    num_agent_steps_trained: 909000
    num_steps_sampled: 909000
    num_steps_trained: 90900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,909,23383.2,909000,-2.8265,-2.3,-3.52,282.65




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 910000
  custom_metrics: {}
  date: 2021-11-05_19-02-39
  done: false
  episode_len_mean: 280.8
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.8079999999999843
  episode_reward_min: -3.519999999999969
  episodes_this_iter: 4
  episodes_total: 3393
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.5649799399905735
          entropy_coeff: 0.009999999999999998
          kl: 0.013942470654877562
          policy_loss: -0.0505291642414199
          total_loss: -0.048359238687488766
          vf_explained_var: 0.6541594862937927
          vf_loss: 0.009829156408603821
    num_agent_steps_sampled: 910000
    num_agent_steps_trained: 910000
    num_steps_sampled: 910000
    num_steps_trained: 910

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,910,23422.9,910000,-2.808,-2.29,-3.52,280.8


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 911000
  custom_metrics: {}
  date: 2021-11-05_19-03-02
  done: false
  episode_len_mean: 278.99
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7898999999999843
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 3
  episodes_total: 3396
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.613353685537974
          entropy_coeff: 0.009999999999999998
          kl: 0.012046788944172269
          policy_loss: -0.03882223417361577
          total_loss: -0.04033584106299612
          vf_explained_var: 0.6330908536911011
          vf_loss: 0.00771579278839959
    num_agent_steps_sampled: 911000
    num_agent_steps_trained: 911000
    num_steps_sampled: 911000
    num_steps_trained: 911

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,911,23446.2,911000,-2.7899,-2.29,-3.44,278.99


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 912000
  custom_metrics: {}
  date: 2021-11-05_19-03-28
  done: false
  episode_len_mean: 275.54
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7553999999999848
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 5
  episodes_total: 3401
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.7128514726956685
          entropy_coeff: 0.009999999999999998
          kl: 0.005554657119125118
          policy_loss: -0.011197525262832641
          total_loss: -0.013731626090076235
          vf_explained_var: 0.3584859073162079
          vf_loss: 0.011410982275588646
    num_agent_steps_sampled: 912000
    num_agent_steps_trained: 912000
    num_steps_sampled: 912000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,912,23471.8,912000,-2.7554,-2.29,-3.44,275.54


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 913000
  custom_metrics: {}
  date: 2021-11-05_19-03-52
  done: false
  episode_len_mean: 274.45
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7444999999999853
  episode_reward_min: -3.4399999999999706
  episodes_this_iter: 3
  episodes_total: 3404
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.5118827886051602
          entropy_coeff: 0.009999999999999998
          kl: 0.01784386501117125
          policy_loss: -0.032287738886144425
          total_loss: -0.03238234462009536
          vf_explained_var: 0.8241007328033447
          vf_loss: 0.004797717610684534
    num_agent_steps_sampled: 913000
    num_agent_steps_trained: 913000
    num_steps_sampled: 913000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,913,23495.5,913000,-2.7445,-2.29,-3.44,274.45


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 914000
  custom_metrics: {}
  date: 2021-11-05_19-04-11
  done: false
  episode_len_mean: 276.43
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7642999999999853
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 3
  episodes_total: 3407
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.6250905765427484
          entropy_coeff: 0.009999999999999998
          kl: 0.014899016432986025
          policy_loss: 0.03040298240052329
          total_loss: 0.029014034652047688
          vf_explained_var: 0.3319796323776245
          vf_loss: 0.006323178767019676
    num_agent_steps_sampled: 914000
    num_agent_steps_trained: 914000
    num_steps_sampled: 914000
    num_steps_trained: 914

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,914,23515.2,914000,-2.7643,-2.29,-3.9,276.43


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 915000
  custom_metrics: {}
  date: 2021-11-05_19-04-32
  done: false
  episode_len_mean: 277.72
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.777199999999985
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 3
  episodes_total: 3410
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.5283073253101773
          entropy_coeff: 0.009999999999999998
          kl: 0.008949263354244922
          policy_loss: -0.13774317560924423
          total_loss: -0.13932635552353329
          vf_explained_var: 0.18190720677375793
          vf_loss: 0.008570979033700294
    num_agent_steps_sampled: 915000
    num_agent_steps_trained: 915000
    num_steps_sampled: 915000
    num_steps_trained: 91

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,915,23535.8,915000,-2.7772,-2.29,-3.9,277.72


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 916000
  custom_metrics: {}
  date: 2021-11-05_19-04-53
  done: false
  episode_len_mean: 278.77
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.787699999999984
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 4
  episodes_total: 3414
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.6057142323917812
          entropy_coeff: 0.009999999999999998
          kl: 0.010560434186513371
          policy_loss: 0.029236018243763182
          total_loss: 0.02662062429719501
          vf_explained_var: 0.22939537465572357
          vf_loss: 0.007389454672940903
    num_agent_steps_sampled: 916000
    num_agent_steps_trained: 916000
    num_steps_sampled: 916000
    num_steps_trained: 916

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,916,23557,916000,-2.7877,-2.29,-3.9,278.77


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 917000
  custom_metrics: {}
  date: 2021-11-05_19-05-17
  done: false
  episode_len_mean: 278.92
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.789199999999984
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 3
  episodes_total: 3417
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.6533608661757575
          entropy_coeff: 0.009999999999999998
          kl: 0.008831177768305477
          policy_loss: 0.02873706403705809
          total_loss: 0.024585039582517412
          vf_explained_var: -0.23811331391334534
          vf_loss: 0.00732034265400014
    num_agent_steps_sampled: 917000
    num_agent_steps_trained: 917000
    num_steps_sampled: 917000
    num_steps_trained: 917

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,917,23581,917000,-2.7892,-2.29,-3.9,278.92




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 918000
  custom_metrics: {}
  date: 2021-11-05_19-05-54
  done: false
  episode_len_mean: 279.72
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.797199999999984
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 3
  episodes_total: 3420
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.6411013417773777
          entropy_coeff: 0.009999999999999998
          kl: 0.006361058301733636
          policy_loss: -0.1261838946905401
          total_loss: -0.13036085458265412
          vf_explained_var: 0.286785751581192
          vf_loss: 0.008588472430387305
    num_agent_steps_sampled: 918000
    num_agent_steps_trained: 918000
    num_steps_sampled: 918000
    num_steps_trained: 91800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,918,23617.7,918000,-2.7972,-2.29,-3.9,279.72


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 919000
  custom_metrics: {}
  date: 2021-11-05_19-06-18
  done: false
  episode_len_mean: 279.34
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7933999999999846
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 4
  episodes_total: 3424
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.5653017905023363
          entropy_coeff: 0.009999999999999998
          kl: 0.009471719300110908
          policy_loss: -0.0044835226403342355
          total_loss: -0.005831625560919444
          vf_explained_var: 0.3803054690361023
          vf_loss: 0.008876574375770158
    num_agent_steps_sampled: 919000
    num_agent_steps_trained: 919000
    num_steps_sampled: 919000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,919,23642,919000,-2.7934,-2.29,-3.9,279.34


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 920000
  custom_metrics: {}
  date: 2021-11-05_19-06-40
  done: false
  episode_len_mean: 278.48
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7847999999999846
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 3
  episodes_total: 3427
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.5439637502034504
          entropy_coeff: 0.009999999999999998
          kl: 0.011470907748434437
          policy_loss: -0.016192898982101016
          total_loss: -0.01843225939406289
          vf_explained_var: 0.5890349745750427
          vf_loss: 0.006626180190748224
    num_agent_steps_sampled: 920000
    num_agent_steps_trained: 920000
    num_steps_sampled: 920000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,920,23663.9,920000,-2.7848,-2.29,-3.9,278.48


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 921000
  custom_metrics: {}
  date: 2021-11-05_19-07-00
  done: false
  episode_len_mean: 279.95
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.799499999999984
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 3
  episodes_total: 3430
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.570217416021559
          entropy_coeff: 0.009999999999999998
          kl: 0.010425863504876334
          policy_loss: 0.07004567061861357
          total_loss: 0.06635925182037883
          vf_explained_var: 0.4310818910598755
          vf_loss: 0.006040588073018525
    num_agent_steps_sampled: 921000
    num_agent_steps_trained: 921000
    num_steps_sampled: 921000
    num_steps_trained: 921000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,921,23683.4,921000,-2.7995,-2.29,-3.9,279.95


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 922000
  custom_metrics: {}
  date: 2021-11-05_19-07-22
  done: false
  episode_len_mean: 280.5
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.8049999999999837
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 3
  episodes_total: 3433
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.553113149272071
          entropy_coeff: 0.009999999999999998
          kl: 0.007552752381320834
          policy_loss: -0.054297016643815574
          total_loss: -0.05591431516740057
          vf_explained_var: 0.6259492039680481
          vf_loss: 0.009585274435165856
    num_agent_steps_sampled: 922000
    num_agent_steps_trained: 922000
    num_steps_sampled: 922000
    num_steps_trained: 922

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,922,23706,922000,-2.805,-2.29,-3.9,280.5


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 923000
  custom_metrics: {}
  date: 2021-11-05_19-07-47
  done: false
  episode_len_mean: 279.26
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.792599999999984
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 4
  episodes_total: 3437
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.2362983663876852
          entropy_coeff: 0.009999999999999998
          kl: 0.006687045062400464
          policy_loss: -0.07053937729862002
          total_loss: -0.07274722986751132
          vf_explained_var: 0.7100167870521545
          vf_loss: 0.006322721164259646
    num_agent_steps_sampled: 923000
    num_agent_steps_trained: 923000
    num_steps_sampled: 923000
    num_steps_trained: 923

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,923,23730.5,923000,-2.7926,-2.29,-3.9,279.26


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 924000
  custom_metrics: {}
  date: 2021-11-05_19-08-11
  done: false
  episode_len_mean: 278.77
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7876999999999845
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 4
  episodes_total: 3441
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5731102182593037
          cur_lr: 5.000000000000001e-05
          entropy: 1.1533294214142693
          entropy_coeff: 0.009999999999999998
          kl: 0.003524860398282807
          policy_loss: -0.0015545146332846748
          total_loss: -0.005079114105966356
          vf_explained_var: 0.680351197719574
          vf_loss: 0.005988559054417743
    num_agent_steps_sampled: 924000
    num_agent_steps_trained: 924000
    num_steps_sampled: 924000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,924,23754.5,924000,-2.7877,-2.29,-3.9,278.77


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 925000
  custom_metrics: {}
  date: 2021-11-05_19-08-35
  done: false
  episode_len_mean: 277.18
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7717999999999847
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 4
  episodes_total: 3445
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2865551091296519
          cur_lr: 5.000000000000001e-05
          entropy: 1.3049950453970167
          entropy_coeff: 0.009999999999999998
          kl: 0.007353216645759789
          policy_loss: 0.023990620258781646
          total_loss: 0.016550518944859503
          vf_explained_var: 0.8472119569778442
          vf_loss: 0.0035027485030392808
    num_agent_steps_sampled: 925000
    num_agent_steps_trained: 925000
    num_steps_sampled: 925000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,925,23778.8,925000,-2.7718,-2.29,-3.9,277.18


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 926000
  custom_metrics: {}
  date: 2021-11-05_19-08-58
  done: false
  episode_len_mean: 277.43
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7742999999999842
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 3
  episodes_total: 3448
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2865551091296519
          cur_lr: 5.000000000000001e-05
          entropy: 1.5127794795566134
          entropy_coeff: 0.009999999999999998
          kl: 0.01390926069438652
          policy_loss: -0.025486734012762705
          total_loss: -0.024946963580118286
          vf_explained_var: 0.44711753726005554
          vf_loss: 0.011681796165390147
    num_agent_steps_sampled: 926000
    num_agent_steps_trained: 926000
    num_steps_sampled: 926000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,926,23801.6,926000,-2.7743,-2.29,-3.9,277.43




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 927000
  custom_metrics: {}
  date: 2021-11-05_19-09-38
  done: false
  episode_len_mean: 277.27
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7726999999999857
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 4
  episodes_total: 3452
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2865551091296519
          cur_lr: 5.000000000000001e-05
          entropy: 1.5427599721484715
          entropy_coeff: 0.009999999999999998
          kl: 0.012439556486812837
          policy_loss: 0.060188936938842134
          total_loss: 0.05651517783602079
          vf_explained_var: 0.6213968396186829
          vf_loss: 0.00818922092568957
    num_agent_steps_sampled: 927000
    num_agent_steps_trained: 927000
    num_steps_sampled: 927000
    num_steps_trained: 9270

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,927,23841.6,927000,-2.7727,-2.29,-3.9,277.27


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 928000
  custom_metrics: {}
  date: 2021-11-05_19-10-00
  done: false
  episode_len_mean: 278.4
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7839999999999843
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 3
  episodes_total: 3455
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2865551091296519
          cur_lr: 5.000000000000001e-05
          entropy: 1.6423409991794162
          entropy_coeff: 0.009999999999999998
          kl: 0.013197896893121532
          policy_loss: -0.11110409419569704
          total_loss: -0.11270479833086332
          vf_explained_var: 0.37011268734931946
          vf_loss: 0.01104078138143652
    num_agent_steps_sampled: 928000
    num_agent_steps_trained: 928000
    num_steps_sampled: 928000
    num_steps_trained: 928

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,928,23863.4,928000,-2.784,-2.29,-3.9,278.4


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 929000
  custom_metrics: {}
  date: 2021-11-05_19-10-21
  done: false
  episode_len_mean: 278.59
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.785899999999984
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 4
  episodes_total: 3459
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2865551091296519
          cur_lr: 5.000000000000001e-05
          entropy: 1.5957934604750739
          entropy_coeff: 0.009999999999999998
          kl: 0.014252765692160215
          policy_loss: -0.020168911500109566
          total_loss: -0.01948711048397753
          vf_explained_var: 0.1931900829076767
          vf_loss: 0.0125555327679548
    num_agent_steps_sampled: 929000
    num_agent_steps_trained: 929000
    num_steps_sampled: 929000
    num_steps_trained: 9290

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,929,23884.9,929000,-2.7859,-2.29,-3.9,278.59


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 930000
  custom_metrics: {}
  date: 2021-11-05_19-10-44
  done: false
  episode_len_mean: 279.41
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7940999999999843
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 3
  episodes_total: 3462
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2865551091296519
          cur_lr: 5.000000000000001e-05
          entropy: 1.5828925251960755
          entropy_coeff: 0.009999999999999998
          kl: 0.0073093686417776225
          policy_loss: 0.04075651715199153
          total_loss: 0.03470960176653332
          vf_explained_var: 0.3776920735836029
          vf_loss: 0.007687471930532612
    num_agent_steps_sampled: 930000
    num_agent_steps_trained: 930000
    num_steps_sampled: 930000
    num_steps_trained: 930

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,930,23907,930000,-2.7941,-2.29,-3.9,279.41


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 931000
  custom_metrics: {}
  date: 2021-11-05_19-11-05
  done: false
  episode_len_mean: 280.84
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.8083999999999842
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 4
  episodes_total: 3466
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2865551091296519
          cur_lr: 5.000000000000001e-05
          entropy: 1.5368523571226331
          entropy_coeff: 0.009999999999999998
          kl: 0.012312934860385856
          policy_loss: 0.04349240362644195
          total_loss: 0.04130633407168918
          vf_explained_var: 0.4703613221645355
          vf_loss: 0.009654119352085723
    num_agent_steps_sampled: 931000
    num_agent_steps_trained: 931000
    num_steps_sampled: 931000
    num_steps_trained: 9310

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,931,23928.4,931000,-2.8084,-2.29,-3.9,280.84


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 932000
  custom_metrics: {}
  date: 2021-11-05_19-11-26
  done: false
  episode_len_mean: 282.0
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.819999999999983
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 3
  episodes_total: 3469
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2865551091296519
          cur_lr: 5.000000000000001e-05
          entropy: 1.6055630524953206
          entropy_coeff: 0.009999999999999998
          kl: 0.011271012792949817
          policy_loss: 0.05609408285882738
          total_loss: 0.05452886703941557
          vf_explained_var: 0.2795233428478241
          vf_loss: 0.011260648402902815
    num_agent_steps_sampled: 932000
    num_agent_steps_trained: 932000
    num_steps_sampled: 932000
    num_steps_trained: 932000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,932,23949.8,932000,-2.82,-2.29,-3.9,282


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 933000
  custom_metrics: {}
  date: 2021-11-05_19-11-48
  done: false
  episode_len_mean: 282.97
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.8296999999999826
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 3
  episodes_total: 3472
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2865551091296519
          cur_lr: 5.000000000000001e-05
          entropy: 1.6415829247898526
          entropy_coeff: 0.009999999999999998
          kl: 0.010486028417798506
          policy_loss: 0.005018663240803613
          total_loss: 0.0015798286431365543
          vf_explained_var: 0.3920888602733612
          vf_loss: 0.009972168958564806
    num_agent_steps_sampled: 933000
    num_agent_steps_trained: 933000
    num_steps_sampled: 933000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,933,23971.5,933000,-2.8297,-2.29,-3.9,282.97


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 934000
  custom_metrics: {}
  date: 2021-11-05_19-12-12
  done: false
  episode_len_mean: 283.76
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.8375999999999832
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 4
  episodes_total: 3476
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2865551091296519
          cur_lr: 5.000000000000001e-05
          entropy: 1.4163504242897034
          entropy_coeff: 0.009999999999999998
          kl: 0.009668750884252357
          policy_loss: 0.031796304219298896
          total_loss: 0.03395641106698248
          vf_explained_var: 0.17308928072452545
          vf_loss: 0.013552980973488754
    num_agent_steps_sampled: 934000
    num_agent_steps_trained: 934000
    num_steps_sampled: 934000
    num_steps_trained: 93

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,934,23994.8,934000,-2.8376,-2.29,-3.9,283.76




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 935000
  custom_metrics: {}
  date: 2021-11-05_19-12-51
  done: false
  episode_len_mean: 284.07
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.8406999999999827
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 4
  episodes_total: 3480
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2865551091296519
          cur_lr: 5.000000000000001e-05
          entropy: 1.4201867871814304
          entropy_coeff: 0.009999999999999998
          kl: 0.010719841063680255
          policy_loss: -0.024615389936500125
          total_loss: -0.02473028666443295
          vf_explained_var: 0.42733320593833923
          vf_loss: 0.011015145304716296
    num_agent_steps_sampled: 935000
    num_agent_steps_trained: 935000
    num_steps_sampled: 935000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,935,24034.2,935000,-2.8407,-2.29,-3.9,284.07


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 936000
  custom_metrics: {}
  date: 2021-11-05_19-13-15
  done: false
  episode_len_mean: 284.9
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.848999999999983
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 3
  episodes_total: 3483
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2865551091296519
          cur_lr: 5.000000000000001e-05
          entropy: 1.5344769928190443
          entropy_coeff: 0.009999999999999998
          kl: 0.020103678046710823
          policy_loss: -0.017660671638117895
          total_loss: -0.017468776139948104
          vf_explained_var: -0.16033688187599182
          vf_loss: 0.009775851167634957
    num_agent_steps_sampled: 936000
    num_agent_steps_trained: 936000
    num_steps_sampled: 936000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,936,24058.4,936000,-2.849,-2.29,-3.9,284.9


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 937000
  custom_metrics: {}
  date: 2021-11-05_19-13-39
  done: false
  episode_len_mean: 285.57
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.855699999999983
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 4
  episodes_total: 3487
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.3629846215248107
          entropy_coeff: 0.009999999999999998
          kl: 0.006328096305436305
          policy_loss: 0.028999360733562046
          total_loss: 0.029886558320787217
          vf_explained_var: 0.15238742530345917
          vf_loss: 0.011797023311050401
    num_agent_steps_sampled: 937000
    num_agent_steps_trained: 937000
    num_steps_sampled: 937000
    num_steps_trained: 93

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,937,24081.7,937000,-2.8557,-2.29,-3.9,285.57


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 938000
  custom_metrics: {}
  date: 2021-11-05_19-14-00
  done: false
  episode_len_mean: 287.68
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.876799999999982
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 3
  episodes_total: 3490
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.5533919639057583
          entropy_coeff: 0.009999999999999998
          kl: 0.008000682111395128
          policy_loss: 0.009631016022629208
          total_loss: 0.0038384481022755304
          vf_explained_var: 0.6172929406166077
          vf_loss: 0.006302395293540839
    num_agent_steps_sampled: 938000
    num_agent_steps_trained: 938000
    num_steps_sampled: 938000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,938,24103.3,938000,-2.8768,-2.36,-3.9,287.68


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 939000
  custom_metrics: {}
  date: 2021-11-05_19-14-22
  done: false
  episode_len_mean: 287.81
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.878099999999983
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 3
  episodes_total: 3493
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.4543847110536363
          entropy_coeff: 0.009999999999999998
          kl: 0.010624702013827737
          policy_loss: -0.06354934407605065
          total_loss: -0.06532911062240601
          vf_explained_var: 0.5578847527503967
          vf_loss: 0.008197233086037967
    num_agent_steps_sampled: 939000
    num_agent_steps_trained: 939000
    num_steps_sampled: 939000
    num_steps_trained: 93

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,939,24124.9,939000,-2.8781,-2.36,-3.9,287.81


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 940000
  custom_metrics: {}
  date: 2021-11-05_19-14-43
  done: false
  episode_len_mean: 288.76
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.8875999999999817
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 4
  episodes_total: 3497
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.3962440384758843
          entropy_coeff: 0.009999999999999998
          kl: 0.010076510688557149
          policy_loss: -0.001731609304745992
          total_loss: 0.0006641195052199893
          vf_explained_var: 0.39514774084091187
          vf_loss: 0.012026953293631475
    num_agent_steps_sampled: 940000
    num_agent_steps_trained: 940000
    num_steps_sampled: 940000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,940,24146.5,940000,-2.8876,-2.36,-3.9,288.76


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 941000
  custom_metrics: {}
  date: 2021-11-05_19-15-06
  done: false
  episode_len_mean: 290.88
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -2.9087999999999816
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 3
  episodes_total: 3500
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.4078401221169365
          entropy_coeff: 0.009999999999999998
          kl: 0.00977268229481341
          policy_loss: 0.030355383538537554
          total_loss: 0.02852848991751671
          vf_explained_var: 0.4799918234348297
          vf_loss: 0.008050888652602832
    num_agent_steps_sampled: 941000
    num_agent_steps_trained: 941000
    num_steps_sampled: 941000
    num_steps_trained: 941

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,941,24168.9,941000,-2.9088,-2.38,-3.9,290.88


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 942000
  custom_metrics: {}
  date: 2021-11-05_19-15-26
  done: false
  episode_len_mean: 292.36
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -2.9235999999999813
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 3
  episodes_total: 3503
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.505527377128601
          entropy_coeff: 0.009999999999999998
          kl: 0.009377032937450794
          policy_loss: -0.08383131846785545
          total_loss: -0.07920844289991591
          vf_explained_var: 0.1114891767501831
          vf_loss: 0.015647594879070918
    num_agent_steps_sampled: 942000
    num_agent_steps_trained: 942000
    num_steps_sampled: 942000
    num_steps_trained: 94

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,942,24189.5,942000,-2.9236,-2.38,-3.9,292.36


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 943000
  custom_metrics: {}
  date: 2021-11-05_19-15-49
  done: false
  episode_len_mean: 291.25
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -2.9124999999999814
  episode_reward_min: -3.889999999999961
  episodes_this_iter: 4
  episodes_total: 3507
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.298808397187127
          entropy_coeff: 0.009999999999999998
          kl: 0.007803200503672435
          policy_loss: 0.000738817122247484
          total_loss: 0.004849138855934143
          vf_explained_var: 0.3148742914199829
          vf_loss: 0.01374433453505238
    num_agent_steps_sampled: 943000
    num_agent_steps_trained: 943000
    num_steps_sampled: 943000
    num_steps_trained: 943

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,943,24211.9,943000,-2.9125,-2.38,-3.89,291.25




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 944000
  custom_metrics: {}
  date: 2021-11-05_19-16-32
  done: false
  episode_len_mean: 288.99
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.8898999999999826
  episode_reward_min: -3.889999999999961
  episodes_this_iter: 4
  episodes_total: 3511
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 0.8685705085595449
          entropy_coeff: 0.009999999999999998
          kl: 0.005099496723733271
          policy_loss: 0.07638422118292915
          total_loss: 0.07944196230835385
          vf_explained_var: 0.1932654082775116
          vf_loss: 0.009551516496058968
    num_agent_steps_sampled: 944000
    num_agent_steps_trained: 944000
    num_steps_sampled: 944000
    num_steps_trained: 944

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,944,24254.6,944000,-2.8899,-1.97,-3.89,288.99


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 945000
  custom_metrics: {}
  date: 2021-11-05_19-16-55
  done: false
  episode_len_mean: 287.98
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.879799999999982
  episode_reward_min: -3.889999999999961
  episodes_this_iter: 3
  episodes_total: 3514
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.146059180630578
          entropy_coeff: 0.009999999999999998
          kl: 0.010029657582363979
          policy_loss: -0.11327093334661589
          total_loss: -0.10973292663693428
          vf_explained_var: 0.2469417005777359
          vf_loss: 0.010687524121668604
    num_agent_steps_sampled: 945000
    num_agent_steps_trained: 945000
    num_steps_sampled: 945000
    num_steps_trained: 945

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,945,24278.2,945000,-2.8798,-1.97,-3.89,287.98


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 946000
  custom_metrics: {}
  date: 2021-11-05_19-17-20
  done: false
  episode_len_mean: 285.61
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.856099999999983
  episode_reward_min: -3.889999999999961
  episodes_this_iter: 4
  episodes_total: 3518
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 0.7750212954150306
          entropy_coeff: 0.009999999999999998
          kl: 0.008837095829955012
          policy_loss: -0.11246083511246575
          total_loss: -0.1073728655775388
          vf_explained_var: 0.509806215763092
          vf_loss: 0.009039707916478316
    num_agent_steps_sampled: 946000
    num_agent_steps_trained: 946000
    num_steps_sampled: 946000
    num_steps_trained: 9460

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,946,24303,946000,-2.8561,-1.97,-3.89,285.61


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 947000
  custom_metrics: {}
  date: 2021-11-05_19-17-45
  done: false
  episode_len_mean: 283.08
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.8307999999999836
  episode_reward_min: -3.889999999999961
  episodes_this_iter: 5
  episodes_total: 3523
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.0149278395705752
          entropy_coeff: 0.009999999999999998
          kl: 0.009678716739971567
          policy_loss: -0.04229498977462451
          total_loss: -0.0356296052535375
          vf_explained_var: 0.3908337354660034
          vf_loss: 0.012654439002896348
    num_agent_steps_sampled: 947000
    num_agent_steps_trained: 947000
    num_steps_sampled: 947000
    num_steps_trained: 94

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,947,24328.3,947000,-2.8308,-1.97,-3.89,283.08


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 948000
  custom_metrics: {}
  date: 2021-11-05_19-18-10
  done: false
  episode_len_mean: 282.22
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.8221999999999836
  episode_reward_min: -3.889999999999961
  episodes_this_iter: 3
  episodes_total: 3526
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.1501939800050525
          entropy_coeff: 0.009999999999999998
          kl: 0.013092155340270948
          policy_loss: -0.11768996309902933
          total_loss: -0.11454165072904693
          vf_explained_var: 0.48642057180404663
          vf_loss: 0.009022814242376222
    num_agent_steps_sampled: 948000
    num_agent_steps_trained: 948000
    num_steps_sampled: 948000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,948,24352.7,948000,-2.8222,-1.97,-3.89,282.22


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 949000
  custom_metrics: {}
  date: 2021-11-05_19-18-34
  done: false
  episode_len_mean: 279.57
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.7956999999999845
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 3530
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.182677663034863
          entropy_coeff: 0.009999999999999998
          kl: 0.012043172468094178
          policy_loss: 0.011154281182421578
          total_loss: 0.01490236222743988
          vf_explained_var: 0.48901811242103577
          vf_loss: 0.010398308280855417
    num_agent_steps_sampled: 949000
    num_agent_steps_trained: 949000
    num_steps_sampled: 949000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,949,24376.8,949000,-2.7957,-1.97,-3.78,279.57


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 950000
  custom_metrics: {}
  date: 2021-11-05_19-18-57
  done: false
  episode_len_mean: 278.72
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.7871999999999844
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 3534
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.25337696340349
          entropy_coeff: 0.009999999999999998
          kl: 0.00811669203370084
          policy_loss: -0.015758565316597622
          total_loss: -0.012657152530219819
          vf_explained_var: 0.1601497381925583
          vf_loss: 0.012146363748858373
    num_agent_steps_sampled: 950000
    num_agent_steps_trained: 950000
    num_steps_sampled: 950000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,950,24399.8,950000,-2.7872,-1.97,-3.78,278.72


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 951000
  custom_metrics: {}
  date: 2021-11-05_19-19-20
  done: false
  episode_len_mean: 279.53
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.7952999999999846
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 3
  episodes_total: 3537
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.3561854018105401
          entropy_coeff: 0.009999999999999998
          kl: 0.009987877055099799
          policy_loss: -0.1251639516817199
          total_loss: -0.12188840582966805
          vf_explained_var: 0.15402840077877045
          vf_loss: 0.012544283607146807
    num_agent_steps_sampled: 951000
    num_agent_steps_trained: 951000
    num_steps_sampled: 951000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,951,24422.9,951000,-2.7953,-1.97,-3.78,279.53




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 952000
  custom_metrics: {}
  date: 2021-11-05_19-20-02
  done: false
  episode_len_mean: 279.37
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.7936999999999843
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 3541
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.316424392329322
          entropy_coeff: 0.009999999999999998
          kl: 0.008665428878478012
          policy_loss: -0.11489692893293169
          total_loss: -0.1129970368825727
          vf_explained_var: 0.34572649002075195
          vf_loss: 0.011339448609699805
    num_agent_steps_sampled: 952000
    num_agent_steps_trained: 952000
    num_steps_sampled: 952000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,952,24464.6,952000,-2.7937,-1.97,-3.78,279.37


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 953000
  custom_metrics: {}
  date: 2021-11-05_19-20-26
  done: false
  episode_len_mean: 278.76
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.787599999999985
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 3545
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.2782583170466952
          entropy_coeff: 0.009999999999999998
          kl: 0.006459782952559263
          policy_loss: -0.1018274962902069
          total_loss: -0.10150887535678016
          vf_explained_var: 0.4243125915527344
          vf_loss: 0.01032457830539594
    num_agent_steps_sampled: 953000
    num_agent_steps_trained: 953000
    num_steps_sampled: 953000
    num_steps_trained: 953

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,953,24488.7,953000,-2.7876,-1.97,-3.78,278.76


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 954000
  custom_metrics: {}
  date: 2021-11-05_19-20-50
  done: false
  episode_len_mean: 278.27
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.7826999999999846
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 3549
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.3289189842012195
          entropy_coeff: 0.009999999999999998
          kl: 0.01128981248089226
          policy_loss: 0.013640548123253717
          total_loss: 0.01187003172106213
          vf_explained_var: 0.6467753648757935
          vf_loss: 0.006665944561569227
    num_agent_steps_sampled: 954000
    num_agent_steps_trained: 954000
    num_steps_sampled: 954000
    num_steps_trained: 95

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,954,24512.2,954000,-2.7827,-1.97,-3.78,278.27


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 955000
  custom_metrics: {}
  date: 2021-11-05_19-21-15
  done: false
  episode_len_mean: 276.8
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.7679999999999843
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 3553
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.3270298692915174
          entropy_coeff: 0.009999999999999998
          kl: 0.008023129463845495
          policy_loss: 0.004648042139079836
          total_loss: 0.002149028993315167
          vf_explained_var: 0.5384940505027771
          vf_loss: 0.0073226853480769525
    num_agent_steps_sampled: 955000
    num_agent_steps_trained: 955000
    num_steps_sampled: 955000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,955,24537.6,955000,-2.768,-1.97,-3.78,276.8


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 956000
  custom_metrics: {}
  date: 2021-11-05_19-21-41
  done: false
  episode_len_mean: 274.79
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.7478999999999854
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 3557
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.227678894996643
          entropy_coeff: 0.009999999999999998
          kl: 0.007637854485066084
          policy_loss: -0.06712759273747602
          total_loss: -0.06790345700250731
          vf_explained_var: 0.5772749781608582
          vf_loss: 0.008217927353042696
    num_agent_steps_sampled: 956000
    num_agent_steps_trained: 956000
    num_steps_sampled: 956000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,956,24563.2,956000,-2.7479,-1.97,-3.78,274.79


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 957000
  custom_metrics: {}
  date: 2021-11-05_19-22-05
  done: false
  episode_len_mean: 273.55
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.7354999999999854
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 3561
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.395269939634535
          entropy_coeff: 0.009999999999999998
          kl: 0.008133957461712388
          policy_loss: -0.02417424428794119
          total_loss: -0.024763261857959958
          vf_explained_var: 0.5534118413925171
          vf_loss: 0.00986744103849762
    num_agent_steps_sampled: 957000
    num_agent_steps_trained: 957000
    num_steps_sampled: 957000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,957,24587.3,957000,-2.7355,-1.97,-3.78,273.55


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 958000
  custom_metrics: {}
  date: 2021-11-05_19-22-29
  done: false
  episode_len_mean: 272.53
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.7252999999999847
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 4
  episodes_total: 3565
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.2807930999332005
          entropy_coeff: 0.009999999999999998
          kl: 0.006109363067533153
          policy_loss: -0.02191225729054875
          total_loss: -0.021354134960307015
          vf_explained_var: 0.476596862077713
          vf_loss: 0.010740050078473158
    num_agent_steps_sampled: 958000
    num_agent_steps_trained: 958000
    num_steps_sampled: 958000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,958,24611.9,958000,-2.7253,-1.97,-3.78,272.53


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 959000
  custom_metrics: {}
  date: 2021-11-05_19-22-55
  done: false
  episode_len_mean: 269.65
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.6964999999999857
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 4
  episodes_total: 3569
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.2067687339252895
          entropy_coeff: 0.009999999999999998
          kl: 0.010062500338883435
          policy_loss: -0.004419967904686928
          total_loss: -0.006505465010801951
          vf_explained_var: 0.7352762222290039
          vf_loss: 0.005656993435695768
    num_agent_steps_sampled: 959000
    num_agent_steps_trained: 959000
    num_steps_sampled: 959000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,959,24637.1,959000,-2.6965,-1.97,-3.58,269.65




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 960000
  custom_metrics: {}
  date: 2021-11-05_19-23-36
  done: false
  episode_len_mean: 267.57
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.6756999999999875
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 4
  episodes_total: 3573
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.2720281177096897
          entropy_coeff: 0.009999999999999998
          kl: 0.010267349359755684
          policy_loss: 0.025451659576760398
          total_loss: 0.029152884748246934
          vf_explained_var: 0.308318167924881
          vf_loss: 0.012008263501856062
    num_agent_steps_sampled: 960000
    num_agent_steps_trained: 960000
    num_steps_sampled: 960000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,960,24678.3,960000,-2.6757,-1.97,-3.58,267.57


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 961000
  custom_metrics: {}
  date: 2021-11-05_19-24-01
  done: false
  episode_len_mean: 267.0
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.669999999999987
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 4
  episodes_total: 3577
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.1364810864130657
          entropy_coeff: 0.009999999999999998
          kl: 0.0069821368796226975
          policy_loss: -0.0010829983486069573
          total_loss: -0.0013654723763465882
          vf_explained_var: 0.7220091819763184
          vf_loss: 0.008081188114980856
    num_agent_steps_sampled: 961000
    num_agent_steps_trained: 961000
    num_steps_sampled: 961000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,961,24703.4,961000,-2.67,-1.97,-3.58,267


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 962000
  custom_metrics: {}
  date: 2021-11-05_19-24-26
  done: false
  episode_len_mean: 265.66
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.656599999999987
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 4
  episodes_total: 3581
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.0737958861721886
          entropy_coeff: 0.009999999999999998
          kl: 0.007642838423500267
          policy_loss: 0.054694137225548424
          total_loss: 0.052823300080166925
          vf_explained_var: 0.8147340416908264
          vf_loss: 0.005581978185930186
    num_agent_steps_sampled: 962000
    num_agent_steps_trained: 962000
    num_steps_sampled: 962000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,962,24728.7,962000,-2.6566,-1.97,-3.58,265.66


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 963000
  custom_metrics: {}
  date: 2021-11-05_19-24-50
  done: false
  episode_len_mean: 265.32
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.6531999999999862
  episode_reward_min: -3.5799999999999677
  episodes_this_iter: 4
  episodes_total: 3585
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.2321027729246352
          entropy_coeff: 0.009999999999999998
          kl: 0.007192477600041356
          policy_loss: -0.05941231523950895
          total_loss: -0.05598004216121303
          vf_explained_var: 0.3036974370479584
          vf_loss: 0.012661739169723457
    num_agent_steps_sampled: 963000
    num_agent_steps_trained: 963000
    num_steps_sampled: 963000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,963,24752.9,963000,-2.6532,-1.97,-3.58,265.32


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 964000
  custom_metrics: {}
  date: 2021-11-05_19-25-15
  done: false
  episode_len_mean: 263.14
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.6313999999999873
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3589
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.2820430437723795
          entropy_coeff: 0.009999999999999998
          kl: 0.009261594610930502
          policy_loss: 0.01064683881898721
          total_loss: 0.011031037734614477
          vf_explained_var: 0.560174286365509
          vf_loss: 0.009223693774806129
    num_agent_steps_sampled: 964000
    num_agent_steps_trained: 964000
    num_steps_sampled: 964000
    num_steps_trained: 96

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,964,24777,964000,-2.6314,-1.97,-3.41,263.14


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 965000
  custom_metrics: {}
  date: 2021-11-05_19-25-39
  done: false
  episode_len_mean: 261.94
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.619399999999988
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3593
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.2081128219763437
          entropy_coeff: 0.009999999999999998
          kl: 0.006691743132861008
          policy_loss: -0.010680048167705536
          total_loss: -0.008075100680192312
          vf_explained_var: 0.3442437946796417
          vf_loss: 0.011809744592756032
    num_agent_steps_sampled: 965000
    num_agent_steps_trained: 965000
    num_steps_sampled: 965000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,965,24801.9,965000,-2.6194,-1.97,-3.41,261.94


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 966000
  custom_metrics: {}
  date: 2021-11-05_19-26-06
  done: false
  episode_len_mean: 259.52
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.595199999999989
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3597
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 0.963178375032213
          entropy_coeff: 0.009999999999999998
          kl: 0.005382056741125041
          policy_loss: 0.051854692647854486
          total_loss: 0.051320848613977434
          vf_explained_var: 0.5174766182899475
          vf_loss: 0.006784555621238218
    num_agent_steps_sampled: 966000
    num_agent_steps_trained: 966000
    num_steps_sampled: 966000
    num_steps_trained: 96

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,966,24828.3,966000,-2.5952,-1.97,-3.41,259.52




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 967000
  custom_metrics: {}
  date: 2021-11-05_19-26-49
  done: false
  episode_len_mean: 257.21
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.572099999999989
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 3601
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.1306650559107463
          entropy_coeff: 0.009999999999999998
          kl: 0.012162690769019867
          policy_loss: -0.032728372679816355
          total_loss: -0.031759876840644415
          vf_explained_var: 0.7079386115074158
          vf_loss: 0.007047222337375084
    num_agent_steps_sampled: 967000
    num_agent_steps_trained: 967000
    num_steps_sampled: 967000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,967,24871.2,967000,-2.5721,-1.97,-3.41,257.21


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 968000
  custom_metrics: {}
  date: 2021-11-05_19-27-15
  done: false
  episode_len_mean: 254.42
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.54419999999999
  episode_reward_min: -3.3399999999999728
  episodes_this_iter: 4
  episodes_total: 3605
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.0216293460792965
          entropy_coeff: 0.009999999999999998
          kl: 0.0056765659201523195
          policy_loss: 0.019049815585215887
          total_loss: 0.015759655088186265
          vf_explained_var: 0.7927703261375427
          vf_loss: 0.004486158842013942
    num_agent_steps_sampled: 968000
    num_agent_steps_trained: 968000
    num_steps_sampled: 968000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,968,24896.9,968000,-2.5442,-1.97,-3.34,254.42


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 969000
  custom_metrics: {}
  date: 2021-11-05_19-27-40
  done: false
  episode_len_mean: 253.9
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.53899999999999
  episode_reward_min: -3.1599999999999766
  episodes_this_iter: 5
  episodes_total: 3610
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.00233393907547
          entropy_coeff: 0.009999999999999998
          kl: 0.0073665156515664715
          policy_loss: -0.022627856582403183
          total_loss: -0.023716878394285837
          vf_explained_var: 0.8098015785217285
          vf_loss: 0.0057679485426180895
    num_agent_steps_sampled: 969000
    num_agent_steps_trained: 969000
    num_steps_sampled: 969000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,969,24922.7,969000,-2.539,-2.03,-3.16,253.9


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 970000
  custom_metrics: {}
  date: 2021-11-05_19-28-06
  done: false
  episode_len_mean: 252.35
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.52349999999999
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 4
  episodes_total: 3614
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.1690732253922356
          entropy_coeff: 0.009999999999999998
          kl: 0.010645799140114163
          policy_loss: -0.05169308823015955
          total_loss: -0.052265060444672905
          vf_explained_var: 0.8022320866584778
          vf_loss: 0.006542845517914328
    num_agent_steps_sampled: 970000
    num_agent_steps_trained: 970000
    num_steps_sampled: 970000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,970,24947.8,970000,-2.5235,-2.03,-3.02,252.35


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 971000
  custom_metrics: {}
  date: 2021-11-05_19-28-29
  done: false
  episode_len_mean: 253.03
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.53029999999999
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 3
  episodes_total: 3617
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.3900443646642897
          entropy_coeff: 0.009999999999999998
          kl: 0.019118932397267308
          policy_loss: -0.11641256991359923
          total_loss: -0.11038496510850059
          vf_explained_var: 0.5678228139877319
          vf_loss: 0.011710109489245547
    num_agent_steps_sampled: 971000
    num_agent_steps_trained: 971000
    num_steps_sampled: 971000
    num_steps_trained: 97

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,971,24971.4,971000,-2.5303,-2.03,-3.02,253.03


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 972000
  custom_metrics: {}
  date: 2021-11-05_19-28-54
  done: false
  episode_len_mean: 253.27
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.5326999999999895
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 4
  episodes_total: 3621
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.2955936974949307
          entropy_coeff: 0.009999999999999998
          kl: 0.008419015147939765
          policy_loss: 0.029385272040963174
          total_loss: 0.02910297479894426
          vf_explained_var: 0.6006901264190674
          vf_loss: 0.009054871720986233
    num_agent_steps_sampled: 972000
    num_agent_steps_trained: 972000
    num_steps_sampled: 972000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,972,24995.7,972000,-2.5327,-2.03,-3.02,253.27


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 973000
  custom_metrics: {}
  date: 2021-11-05_19-29-17
  done: false
  episode_len_mean: 253.83
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.53829999999999
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 4
  episodes_total: 3625
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.2832167519463433
          entropy_coeff: 0.009999999999999998
          kl: 0.011796678329662422
          policy_loss: 0.02061221305694845
          total_loss: 0.02164892264538341
          vf_explained_var: 0.4317980110645294
          vf_loss: 0.00879827671063443
    num_agent_steps_sampled: 973000
    num_agent_steps_trained: 973000
    num_steps_sampled: 973000
    num_steps_trained: 97300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,973,25019.2,973000,-2.5383,-2.03,-3.02,253.83


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 974000
  custom_metrics: {}
  date: 2021-11-05_19-29-40
  done: false
  episode_len_mean: 254.32
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.54319999999999
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 3
  episodes_total: 3628
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.3050231337547302
          entropy_coeff: 0.009999999999999998
          kl: 0.0110505266014587
          policy_loss: -0.1324286647554901
          total_loss: -0.12928373585972522
          vf_explained_var: 0.3985496461391449
          vf_loss: 0.01144528543162677
    num_agent_steps_sampled: 974000
    num_agent_steps_trained: 974000
    num_steps_sampled: 974000
    num_steps_trained: 974000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,974,25041.8,974000,-2.5432,-2.03,-3.1,254.32




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 975000
  custom_metrics: {}
  date: 2021-11-05_19-30-21
  done: false
  episode_len_mean: 254.52
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.5451999999999897
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 3632
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.1943931500116984
          entropy_coeff: 0.009999999999999998
          kl: 0.008349657713519261
          policy_loss: -0.05343544594943524
          total_loss: -0.04881875423921479
          vf_explained_var: 0.14194269478321075
          vf_loss: 0.012971667691858278
    num_agent_steps_sampled: 975000
    num_agent_steps_trained: 975000
    num_steps_sampled: 975000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,975,25083.3,975000,-2.5452,-2.03,-3.1,254.52


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 976000
  custom_metrics: {}
  date: 2021-11-05_19-30-45
  done: false
  episode_len_mean: 254.29
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.5428999999999893
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 3636
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.1346282958984375
          entropy_coeff: 0.009999999999999998
          kl: 0.013324333175842796
          policy_loss: -0.022238470365603766
          total_loss: -0.01652132703198327
          vf_explained_var: 0.1297726035118103
          vf_loss: 0.011336195070503487
    num_agent_steps_sampled: 976000
    num_agent_steps_trained: 976000
    num_steps_sampled: 976000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,976,25106.7,976000,-2.5429,-2.03,-3.1,254.29


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 977000
  custom_metrics: {}
  date: 2021-11-05_19-31-09
  done: false
  episode_len_mean: 254.37
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5436999999999896
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 3640
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.153079957432217
          entropy_coeff: 0.009999999999999998
          kl: 0.009480724402208275
          policy_loss: 0.002230533709128698
          total_loss: 0.005179029785924488
          vf_explained_var: 0.4136546552181244
          vf_loss: 0.010404169947529832
    num_agent_steps_sampled: 977000
    num_agent_steps_trained: 977000
    num_steps_sampled: 977000
    num_steps_trained: 97

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,977,25131.2,977000,-2.5437,-2.05,-3.1,254.37


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 978000
  custom_metrics: {}
  date: 2021-11-05_19-31-32
  done: false
  episode_len_mean: 254.77
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.547699999999989
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 3644
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 1.1508940736452737
          entropy_coeff: 0.009999999999999998
          kl: 0.008321086480621728
          policy_loss: -0.003854518797662523
          total_loss: -0.00036263449324501886
          vf_explained_var: 0.16691572964191437
          vf_loss: 0.01142415049382382
    num_agent_steps_sampled: 978000
    num_agent_steps_trained: 978000
    num_steps_sampled: 978000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,978,25153.9,978000,-2.5477,-2.05,-3.1,254.77


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 979000
  custom_metrics: {}
  date: 2021-11-05_19-31-57
  done: false
  episode_len_mean: 254.28
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.542799999999989
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 3
  episodes_total: 3647
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 0.8617871128850513
          entropy_coeff: 0.009999999999999998
          kl: 0.006144698966815317
          policy_loss: -0.0888391103181574
          total_loss: -0.086161796334717
          vf_explained_var: 0.5275818109512329
          vf_loss: 0.00865399083122611
    num_agent_steps_sampled: 979000
    num_agent_steps_trained: 979000
    num_steps_sampled: 979000
    num_steps_trained: 979000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,979,25178.6,979000,-2.5428,-2.05,-3.1,254.28


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 980000
  custom_metrics: {}
  date: 2021-11-05_19-32-20
  done: false
  episode_len_mean: 254.19
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.54189999999999
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 3651
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4298326636944779
          cur_lr: 5.000000000000001e-05
          entropy: 0.9970610459645589
          entropy_coeff: 0.009999999999999998
          kl: 0.0049214874490770754
          policy_loss: 0.005495404038164351
          total_loss: 0.006577190425660875
          vf_explained_var: 0.36076101660728455
          vf_loss: 0.00893698154638211
    num_agent_steps_sampled: 980000
    num_agent_steps_trained: 980000
    num_steps_sampled: 980000
    num_steps_trained: 98

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,980,25202.4,980000,-2.5419,-2.05,-3.1,254.19


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 981000
  custom_metrics: {}
  date: 2021-11-05_19-32-45
  done: false
  episode_len_mean: 254.72
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.54719999999999
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 3655
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21491633184723896
          cur_lr: 5.000000000000001e-05
          entropy: 0.867902488178677
          entropy_coeff: 0.009999999999999998
          kl: 0.006496364055787672
          policy_loss: -0.06141353141930368
          total_loss: -0.05677476558420393
          vf_explained_var: 0.1652994304895401
          vf_loss: 0.011921615174247158
    num_agent_steps_sampled: 981000
    num_agent_steps_trained: 981000
    num_steps_sampled: 981000
    num_steps_trained: 981

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,981,25227.1,981000,-2.5472,-2.05,-3.1,254.72


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 982000
  custom_metrics: {}
  date: 2021-11-05_19-33-11
  done: false
  episode_len_mean: 255.02
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5501999999999896
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 3659
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21491633184723896
          cur_lr: 5.000000000000001e-05
          entropy: 1.022078549861908
          entropy_coeff: 0.009999999999999998
          kl: 0.010286645421209863
          policy_loss: -0.08583985136614905
          total_loss: -0.0824040765563647
          vf_explained_var: 0.5448604822158813
          vf_loss: 0.01144579329734875
    num_agent_steps_sampled: 982000
    num_agent_steps_trained: 982000
    num_steps_sampled: 982000
    num_steps_trained: 982

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,982,25253,982000,-2.5502,-2.05,-3.1,255.02




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 983000
  custom_metrics: {}
  date: 2021-11-05_19-33-54
  done: false
  episode_len_mean: 253.48
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.5347999999999904
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 5
  episodes_total: 3664
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21491633184723896
          cur_lr: 5.000000000000001e-05
          entropy: 0.5793002777629428
          entropy_coeff: 0.009999999999999998
          kl: 0.006059452959768584
          policy_loss: -0.029278076854017045
          total_loss: -0.023958801312579048
          vf_explained_var: 0.4414450228214264
          vf_loss: 0.00981000135652721
    num_agent_steps_sampled: 983000
    num_agent_steps_trained: 983000
    num_steps_sampled: 983000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,983,25296.2,983000,-2.5348,-1.99,-3.1,253.48


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 984000
  custom_metrics: {}
  date: 2021-11-05_19-34-21
  done: false
  episode_len_mean: 252.55
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.5254999999999903
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 3668
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21491633184723896
          cur_lr: 5.000000000000001e-05
          entropy: 0.5656674414873123
          entropy_coeff: 0.009999999999999998
          kl: 0.0060957156939107585
          policy_loss: 0.0648808972703086
          total_loss: 0.07088335239224963
          vf_explained_var: 0.5279948711395264
          vf_loss: 0.010349060822692182
    num_agent_steps_sampled: 984000
    num_agent_steps_trained: 984000
    num_steps_sampled: 984000
    num_steps_trained: 98

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,984,25322.9,984000,-2.5255,-1.99,-3.1,252.55


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 985000
  custom_metrics: {}
  date: 2021-11-05_19-34-47
  done: false
  episode_len_mean: 252.29
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.52289999999999
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 3672
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21491633184723896
          cur_lr: 5.000000000000001e-05
          entropy: 0.7635246213939455
          entropy_coeff: 0.009999999999999998
          kl: 0.014767633340365509
          policy_loss: -0.04349407143890858
          total_loss: -0.03753901691072517
          vf_explained_var: 0.470202773809433
          vf_loss: 0.010416494930783908
    num_agent_steps_sampled: 985000
    num_agent_steps_trained: 985000
    num_steps_sampled: 985000
    num_steps_trained: 985

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,985,25349,985000,-2.5229,-1.99,-3.1,252.29


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 986000
  custom_metrics: {}
  date: 2021-11-05_19-35-12
  done: false
  episode_len_mean: 251.72
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.51719999999999
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 3676
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21491633184723896
          cur_lr: 5.000000000000001e-05
          entropy: 0.8588229146268632
          entropy_coeff: 0.009999999999999998
          kl: 0.0056448899388780345
          policy_loss: -0.0011330164968967437
          total_loss: -0.0010776053700182174
          vf_explained_var: 0.6168806552886963
          vf_loss: 0.007430461224996381
    num_agent_steps_sampled: 986000
    num_agent_steps_trained: 986000
    num_steps_sampled: 986000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,986,25374,986000,-2.5172,-1.99,-3.1,251.72


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 987000
  custom_metrics: {}
  date: 2021-11-05_19-35-38
  done: false
  episode_len_mean: 251.86
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.51859999999999
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 3680
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21491633184723896
          cur_lr: 5.000000000000001e-05
          entropy: 0.9231280730830298
          entropy_coeff: 0.009999999999999998
          kl: 0.005176036695485524
          policy_loss: -0.055075136075417204
          total_loss: -0.05787179585960176
          vf_explained_var: 0.7872459292411804
          vf_loss: 0.005322203487675223
    num_agent_steps_sampled: 987000
    num_agent_steps_trained: 987000
    num_steps_sampled: 987000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,987,25399.7,987000,-2.5186,-1.99,-3.1,251.86


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 988000
  custom_metrics: {}
  date: 2021-11-05_19-36-04
  done: false
  episode_len_mean: 250.68
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.5067999999999904
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 5
  episodes_total: 3685
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21491633184723896
          cur_lr: 5.000000000000001e-05
          entropy: 0.4998854403694471
          entropy_coeff: 0.009999999999999998
          kl: 0.0409616753263887
          policy_loss: 0.07682726503246361
          total_loss: 0.0915903079840872
          vf_explained_var: 0.5443210005760193
          vf_loss: 0.010958563269943827
    num_agent_steps_sampled: 988000
    num_agent_steps_trained: 988000
    num_steps_sampled: 988000
    num_steps_trained: 98800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,988,25425.7,988000,-2.5068,-1.99,-3.1,250.68


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 989000
  custom_metrics: {}
  date: 2021-11-05_19-36-29
  done: false
  episode_len_mean: 250.49
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.5048999999999904
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 3689
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3223744977708584
          cur_lr: 5.000000000000001e-05
          entropy: 1.113485985994339
          entropy_coeff: 0.009999999999999998
          kl: 0.013308458555383516
          policy_loss: -0.0006030740009413825
          total_loss: -4.42523095342848e-05
          vf_explained_var: 0.4392659068107605
          vf_loss: 0.0074033757788129154
    num_agent_steps_sampled: 989000
    num_agent_steps_trained: 989000
    num_steps_sampled: 989000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,989,25450.8,989000,-2.5049,-1.99,-3.1,250.49




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 990000
  custom_metrics: {}
  date: 2021-11-05_19-37-12
  done: false
  episode_len_mean: 250.4
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.5039999999999902
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 3693
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3223744977708584
          cur_lr: 5.000000000000001e-05
          entropy: 1.2726309167014227
          entropy_coeff: 0.009999999999999998
          kl: 0.01702133893790408
          policy_loss: -0.04176349805461036
          total_loss: -0.04505829343365298
          vf_explained_var: 0.8795396685600281
          vf_loss: 0.003944266895349655
    num_agent_steps_sampled: 990000
    num_agent_steps_trained: 990000
    num_steps_sampled: 990000
    num_steps_trained: 990

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,990,25493.9,990000,-2.504,-1.99,-3.1,250.4


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 991000
  custom_metrics: {}
  date: 2021-11-05_19-37-37
  done: false
  episode_len_mean: 251.28
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.51279999999999
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 3697
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3223744977708584
          cur_lr: 5.000000000000001e-05
          entropy: 1.4286877049340143
          entropy_coeff: 0.009999999999999998
          kl: 0.013994647125766828
          policy_loss: 0.015592342283990647
          total_loss: 0.009436728929479917
          vf_explained_var: 0.9013144373893738
          vf_loss: 0.00361974748213672
    num_agent_steps_sampled: 991000
    num_agent_steps_trained: 991000
    num_steps_sampled: 991000
    num_steps_trained: 9910

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,991,25518.3,991000,-2.5128,-1.99,-3.1,251.28


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 992000
  custom_metrics: {}
  date: 2021-11-05_19-38-01
  done: false
  episode_len_mean: 252.12
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.52119999999999
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 3
  episodes_total: 3700
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3223744977708584
          cur_lr: 5.000000000000001e-05
          entropy: 1.3600136399269105
          entropy_coeff: 0.009999999999999998
          kl: 0.011642191597403538
          policy_loss: -0.036695344911681284
          total_loss: -0.036986358132627274
          vf_explained_var: 0.4849216341972351
          vf_loss: 0.00955597963442819
    num_agent_steps_sampled: 992000
    num_agent_steps_trained: 992000
    num_steps_sampled: 992000
    num_steps_trained: 99

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,992,25542.7,992000,-2.5212,-1.99,-3.1,252.12


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 993000
  custom_metrics: {}
  date: 2021-11-05_19-38-25
  done: false
  episode_len_mean: 252.39
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.52389999999999
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 3704
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3223744977708584
          cur_lr: 5.000000000000001e-05
          entropy: 1.4140144824981689
          entropy_coeff: 0.009999999999999998
          kl: 0.013638347920937852
          policy_loss: 0.004448341412676705
          total_loss: -0.0014890619036224152
          vf_explained_var: 0.8640117049217224
          vf_loss: 0.0038060882749656835
    num_agent_steps_sampled: 993000
    num_agent_steps_trained: 993000
    num_steps_sampled: 993000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,993,25567,993000,-2.5239,-1.99,-3.1,252.39


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 994000
  custom_metrics: {}
  date: 2021-11-05_19-38-50
  done: false
  episode_len_mean: 253.02
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.53019999999999
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 3708
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3223744977708584
          cur_lr: 5.000000000000001e-05
          entropy: 1.345432636472914
          entropy_coeff: 0.009999999999999998
          kl: 0.0166190492319231
          policy_loss: -0.04097829225162665
          total_loss: -0.04392984691593382
          vf_explained_var: 0.866919219493866
          vf_loss: 0.005145212278390924
    num_agent_steps_sampled: 994000
    num_agent_steps_trained: 994000
    num_steps_sampled: 994000
    num_steps_trained: 994000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,994,25591.6,994000,-2.5302,-1.99,-3.1,253.02


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 995000
  custom_metrics: {}
  date: 2021-11-05_19-39-14
  done: false
  episode_len_mean: 253.51
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.53509999999999
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 3712
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3223744977708584
          cur_lr: 5.000000000000001e-05
          entropy: 1.3875146137343513
          entropy_coeff: 0.009999999999999998
          kl: 0.007625546781802603
          policy_loss: 0.05400942141811053
          total_loss: 0.04737071485983001
          vf_explained_var: 0.8448518514633179
          vf_loss: 0.004778156796884206
    num_agent_steps_sampled: 995000
    num_agent_steps_trained: 995000
    num_steps_sampled: 995000
    num_steps_trained: 99500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,995,25615.2,995000,-2.5351,-1.99,-3.1,253.51


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 996000
  custom_metrics: {}
  date: 2021-11-05_19-39-38
  done: false
  episode_len_mean: 253.46
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.53459999999999
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 3716
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3223744977708584
          cur_lr: 5.000000000000001e-05
          entropy: 1.4101881159676446
          entropy_coeff: 0.009999999999999998
          kl: 0.008186839689876674
          policy_loss: 0.034566585222880045
          total_loss: 0.029909739394982655
          vf_explained_var: 0.728963315486908
          vf_loss: 0.00680581024951405
    num_agent_steps_sampled: 996000
    num_agent_steps_trained: 996000
    num_steps_sampled: 996000
    num_steps_trained: 99600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,996,25639.2,996000,-2.5346,-1.99,-3.1,253.46




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 997000
  custom_metrics: {}
  date: 2021-11-05_19-40-19
  done: false
  episode_len_mean: 253.27
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.53269999999999
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 3720
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3223744977708584
          cur_lr: 5.000000000000001e-05
          entropy: 1.4138277875052558
          entropy_coeff: 0.009999999999999998
          kl: 0.01082778311282616
          policy_loss: -0.016226970818307665
          total_loss: -0.018140117844773665
          vf_explained_var: 0.4998551309108734
          vf_loss: 0.008734530370889438
    num_agent_steps_sampled: 997000
    num_agent_steps_trained: 997000
    num_steps_sampled: 997000
    num_steps_trained: 99

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,997,25680.1,997000,-2.5327,-1.99,-3.1,253.27


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 998000
  custom_metrics: {}
  date: 2021-11-05_19-40-44
  done: false
  episode_len_mean: 252.62
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.52619999999999
  episode_reward_min: -3.099999999999978
  episodes_this_iter: 4
  episodes_total: 3724
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3223744977708584
          cur_lr: 5.000000000000001e-05
          entropy: 1.3113279051250881
          entropy_coeff: 0.009999999999999998
          kl: 0.007398375346208594
          policy_loss: 0.022346130675739713
          total_loss: 0.01972818656100167
          vf_explained_var: 0.44367486238479614
          vf_loss: 0.008110287133604288
    num_agent_steps_sampled: 998000
    num_agent_steps_trained: 998000
    num_steps_sampled: 998000
    num_steps_trained: 998

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,998,25705.9,998000,-2.5262,-1.99,-3.1,252.62


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 999000
  custom_metrics: {}
  date: 2021-11-05_19-41-10
  done: false
  episode_len_mean: 251.16
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.5115999999999903
  episode_reward_min: -2.9399999999999813
  episodes_this_iter: 4
  episodes_total: 3728
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3223744977708584
          cur_lr: 5.000000000000001e-05
          entropy: 1.308818561500973
          entropy_coeff: 0.009999999999999998
          kl: 0.008507945813664956
          policy_loss: 0.040704948579271635
          total_loss: 0.03864420602718989
          vf_explained_var: 0.4872264862060547
          vf_loss: 0.008284695524101457
    num_agent_steps_sampled: 999000
    num_agent_steps_trained: 999000
    num_steps_sampled: 999000
    num_steps_trained: 99

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,999,25731.5,999000,-2.5116,-1.99,-2.94,251.16


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1000000
  custom_metrics: {}
  date: 2021-11-05_19-41-36
  done: false
  episode_len_mean: 250.22
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.5021999999999904
  episode_reward_min: -2.9399999999999813
  episodes_this_iter: 4
  episodes_total: 3732
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3223744977708584
          cur_lr: 5.000000000000001e-05
          entropy: 1.2137339260843065
          entropy_coeff: 0.009999999999999998
          kl: 0.007335197638213644
          policy_loss: -0.0712872926145792
          total_loss: -0.07016242345174153
          vf_explained_var: 0.3215474784374237
          vf_loss: 0.010897526962475644
    num_agent_steps_sampled: 1000000
    num_agent_steps_trained: 1000000
    num_steps_sampled: 1000000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1000,25757.9,1000000,-2.5022,-1.99,-2.94,250.22


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1001000
  custom_metrics: {}
  date: 2021-11-05_19-42-02
  done: false
  episode_len_mean: 249.33
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.4932999999999907
  episode_reward_min: -2.9399999999999813
  episodes_this_iter: 4
  episodes_total: 3736
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3223744977708584
          cur_lr: 5.000000000000001e-05
          entropy: 1.215822551647822
          entropy_coeff: 0.009999999999999998
          kl: 0.02466520936461253
          policy_loss: -0.03828061587280697
          total_loss: -0.03102712763680352
          vf_explained_var: 0.34094592928886414
          vf_loss: 0.011460279854428436
    num_agent_steps_sampled: 1001000
    num_agent_steps_trained: 1001000
    num_steps_sampled: 1001000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1001,25783.5,1001000,-2.4933,-1.99,-2.94,249.33


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1002000
  custom_metrics: {}
  date: 2021-11-05_19-42-28
  done: false
  episode_len_mean: 248.16
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.481599999999991
  episode_reward_min: -2.9399999999999813
  episodes_this_iter: 5
  episodes_total: 3741
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4835617466562877
          cur_lr: 5.000000000000001e-05
          entropy: 1.092511965168847
          entropy_coeff: 0.009999999999999998
          kl: 0.005005443277103912
          policy_loss: 0.005280133667919371
          total_loss: 0.008941442022720974
          vf_explained_var: 0.22276395559310913
          vf_loss: 0.012165986249844234
    num_agent_steps_sampled: 1002000
    num_agent_steps_trained: 1002000
    num_steps_sampled: 1002000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1002,25808.9,1002000,-2.4816,-1.99,-2.94,248.16


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1003000
  custom_metrics: {}
  date: 2021-11-05_19-42-53
  done: false
  episode_len_mean: 246.83
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.4682999999999913
  episode_reward_min: -2.7499999999999853
  episodes_this_iter: 4
  episodes_total: 3745
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4835617466562877
          cur_lr: 5.000000000000001e-05
          entropy: 1.1666065010759565
          entropy_coeff: 0.009999999999999998
          kl: 0.00401581565177038
          policy_loss: 0.021924549175633325
          total_loss: 0.022638365046845542
          vf_explained_var: 0.17931902408599854
          vf_loss: 0.010437985002580617
    num_agent_steps_sampled: 1003000
    num_agent_steps_trained: 1003000
    num_steps_sampled: 1003000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1003,25834.8,1003000,-2.4683,-1.99,-2.75,246.83


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1004000
  custom_metrics: {}
  date: 2021-11-05_19-43-21
  done: false
  episode_len_mean: 245.9
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.458999999999991
  episode_reward_min: -2.7499999999999853
  episodes_this_iter: 4
  episodes_total: 3749
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24178087332814385
          cur_lr: 5.000000000000001e-05
          entropy: 0.9501245114538405
          entropy_coeff: 0.009999999999999998
          kl: 0.006828837744118251
          policy_loss: 0.014503309544589784
          total_loss: 0.018712160653538173
          vf_explained_var: 0.09596157819032669
          vf_loss: 0.01205901668096582
    num_agent_steps_sampled: 1004000
    num_agent_steps_trained: 1004000
    num_steps_sampled: 1004000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1004,25862,1004000,-2.459,-1.99,-2.75,245.9




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1005000
  custom_metrics: {}
  date: 2021-11-05_19-44-05
  done: false
  episode_len_mean: 243.97
  episode_media: {}
  episode_reward_max: -1.9400000000000015
  episode_reward_mean: -2.439699999999992
  episode_reward_min: -2.7499999999999853
  episodes_this_iter: 5
  episodes_total: 3754
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24178087332814385
          cur_lr: 5.000000000000001e-05
          entropy: 0.9446793264812894
          entropy_coeff: 0.009999999999999998
          kl: 0.004884155810478556
          policy_loss: -0.005713883870177799
          total_loss: 0.0005435856680075327
          vf_explained_var: 0.1262584626674652
          vf_loss: 0.014523368370201852
    num_agent_steps_sampled: 1005000
    num_agent_steps_trained: 1005000
    num_steps_sampled: 1005000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1005,25906.1,1005000,-2.4397,-1.94,-2.75,243.97


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1006000
  custom_metrics: {}
  date: 2021-11-05_19-44-31
  done: false
  episode_len_mean: 243.43
  episode_media: {}
  episode_reward_max: -1.9400000000000015
  episode_reward_mean: -2.4342999999999915
  episode_reward_min: -2.7499999999999853
  episodes_this_iter: 4
  episodes_total: 3758
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12089043666407193
          cur_lr: 5.000000000000001e-05
          entropy: 0.9549955123000675
          entropy_coeff: 0.009999999999999998
          kl: 0.005318088209272176
          policy_loss: 0.021859217517905764
          total_loss: 0.026108875042862363
          vf_explained_var: 0.09898030757904053
          vf_loss: 0.013156708961145746
    num_agent_steps_sampled: 1006000
    num_agent_steps_trained: 1006000
    num_steps_sampled: 1006000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1006,25932.5,1006000,-2.4343,-1.94,-2.75,243.43


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1007000
  custom_metrics: {}
  date: 2021-11-05_19-44-58
  done: false
  episode_len_mean: 243.75
  episode_media: {}
  episode_reward_max: -1.9400000000000015
  episode_reward_mean: -2.437499999999992
  episode_reward_min: -2.7499999999999853
  episodes_this_iter: 4
  episodes_total: 3762
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12089043666407193
          cur_lr: 5.000000000000001e-05
          entropy: 1.0427771204047733
          entropy_coeff: 0.009999999999999998
          kl: 0.009818304980641557
          policy_loss: 0.006805009063747194
          total_loss: 0.010012019508414799
          vf_explained_var: 0.22474759817123413
          vf_loss: 0.012447842965937324
    num_agent_steps_sampled: 1007000
    num_agent_steps_trained: 1007000
    num_steps_sampled: 1007000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1007,25959.2,1007000,-2.4375,-1.94,-2.75,243.75


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1008000
  custom_metrics: {}
  date: 2021-11-05_19-45-25
  done: false
  episode_len_mean: 243.43
  episode_media: {}
  episode_reward_max: -1.9400000000000015
  episode_reward_mean: -2.434299999999992
  episode_reward_min: -2.7499999999999853
  episodes_this_iter: 5
  episodes_total: 3767
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12089043666407193
          cur_lr: 5.000000000000001e-05
          entropy: 0.9767689201566908
          entropy_coeff: 0.009999999999999998
          kl: 0.011962276621473696
          policy_loss: -0.008803389800919426
          total_loss: -0.004544735203186671
          vf_explained_var: 0.23493416607379913
          vf_loss: 0.01258021719339821
    num_agent_steps_sampled: 1008000
    num_agent_steps_trained: 1008000
    num_steps_sampled: 1008000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1008,25986.3,1008000,-2.4343,-1.94,-2.75,243.43


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1009000
  custom_metrics: {}
  date: 2021-11-05_19-45-51
  done: false
  episode_len_mean: 243.69
  episode_media: {}
  episode_reward_max: -1.9400000000000015
  episode_reward_mean: -2.436899999999992
  episode_reward_min: -2.7499999999999853
  episodes_this_iter: 4
  episodes_total: 3771
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12089043666407193
          cur_lr: 5.000000000000001e-05
          entropy: 0.9427383595042759
          entropy_coeff: 0.009999999999999998
          kl: 0.005663092964345474
          policy_loss: 0.026233407937818105
          total_loss: 0.02907694371210204
          vf_explained_var: 0.1611192375421524
          vf_loss: 0.011586303139726322
    num_agent_steps_sampled: 1009000
    num_agent_steps_trained: 1009000
    num_steps_sampled: 1009000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1009,26012.3,1009000,-2.4369,-1.94,-2.75,243.69


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1010000
  custom_metrics: {}
  date: 2021-11-05_19-46-18
  done: false
  episode_len_mean: 242.99
  episode_media: {}
  episode_reward_max: -1.9400000000000015
  episode_reward_mean: -2.429899999999992
  episode_reward_min: -2.7499999999999853
  episodes_this_iter: 4
  episodes_total: 3775
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12089043666407193
          cur_lr: 5.000000000000001e-05
          entropy: 0.8693810436460707
          entropy_coeff: 0.009999999999999998
          kl: 0.00526929453224396
          policy_loss: 0.011491220444440842
          total_loss: 0.015905162857638464
          vf_explained_var: 0.10740096867084503
          vf_loss: 0.012470743629253572
    num_agent_steps_sampled: 1010000
    num_agent_steps_trained: 1010000
    num_steps_sampled: 1010000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1010,26039.3,1010000,-2.4299,-1.94,-2.75,242.99




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1011000
  custom_metrics: {}
  date: 2021-11-05_19-47-02
  done: false
  episode_len_mean: 242.14
  episode_media: {}
  episode_reward_max: -1.9400000000000015
  episode_reward_mean: -2.4213999999999927
  episode_reward_min: -2.7499999999999853
  episodes_this_iter: 5
  episodes_total: 3780
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12089043666407193
          cur_lr: 5.000000000000001e-05
          entropy: 1.0893974747922686
          entropy_coeff: 0.009999999999999998
          kl: 0.010930313413970794
          policy_loss: 0.06162109524011612
          total_loss: 0.06401148074203067
          vf_explained_var: 0.22137351334095
          vf_loss: 0.011962989169276423
    num_agent_steps_sampled: 1011000
    num_agent_steps_trained: 1011000
    num_steps_sampled: 1011000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1011,26082.6,1011000,-2.4214,-1.94,-2.75,242.14


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1012000
  custom_metrics: {}
  date: 2021-11-05_19-47-29
  done: false
  episode_len_mean: 242.73
  episode_media: {}
  episode_reward_max: -1.9400000000000015
  episode_reward_mean: -2.4272999999999922
  episode_reward_min: -2.7499999999999853
  episodes_this_iter: 4
  episodes_total: 3784
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12089043666407193
          cur_lr: 5.000000000000001e-05
          entropy: 1.06590692003568
          entropy_coeff: 0.009999999999999998
          kl: 0.006401081496209334
          policy_loss: 0.02495680550734202
          total_loss: 0.029360224182407062
          vf_explained_var: 0.04109937697649002
          vf_loss: 0.014288658462464809
    num_agent_steps_sampled: 1012000
    num_agent_steps_trained: 1012000
    num_steps_sampled: 1012000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1012,26109.9,1012000,-2.4273,-1.94,-2.75,242.73


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1013000
  custom_metrics: {}
  date: 2021-11-05_19-47-54
  done: false
  episode_len_mean: 242.7
  episode_media: {}
  episode_reward_max: -1.9400000000000015
  episode_reward_mean: -2.4269999999999916
  episode_reward_min: -2.7499999999999853
  episodes_this_iter: 4
  episodes_total: 3788
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12089043666407193
          cur_lr: 5.000000000000001e-05
          entropy: 1.1203886065218183
          entropy_coeff: 0.009999999999999998
          kl: 0.012478964436740528
          policy_loss: 0.03733991289304362
          total_loss: 0.03965087164607313
          vf_explained_var: 0.02164403349161148
          vf_loss: 0.012006257464074426
    num_agent_steps_sampled: 1013000
    num_agent_steps_trained: 1013000
    num_steps_sampled: 1013000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1013,26134.8,1013000,-2.427,-1.94,-2.75,242.7


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1014000
  custom_metrics: {}
  date: 2021-11-05_19-48-20
  done: false
  episode_len_mean: 242.36
  episode_media: {}
  episode_reward_max: -1.9400000000000015
  episode_reward_mean: -2.423599999999992
  episode_reward_min: -2.7499999999999853
  episodes_this_iter: 4
  episodes_total: 3792
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12089043666407193
          cur_lr: 5.000000000000001e-05
          entropy: 1.0924899094634586
          entropy_coeff: 0.009999999999999998
          kl: 0.008807172445344789
          policy_loss: 0.007400856332646476
          total_loss: 0.012819624071319898
          vf_explained_var: 0.04093260318040848
          vf_loss: 0.015278962896102005
    num_agent_steps_sampled: 1014000
    num_agent_steps_trained: 1014000
    num_steps_sampled: 1014000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1014,26160.7,1014000,-2.4236,-1.94,-2.75,242.36


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1015000
  custom_metrics: {}
  date: 2021-11-05_19-48-47
  done: false
  episode_len_mean: 241.42
  episode_media: {}
  episode_reward_max: -1.9400000000000015
  episode_reward_mean: -2.414199999999992
  episode_reward_min: -2.7399999999999856
  episodes_this_iter: 4
  episodes_total: 3796
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12089043666407193
          cur_lr: 5.000000000000001e-05
          entropy: 0.9799795852767097
          entropy_coeff: 0.009999999999999998
          kl: 0.008359281328787757
          policy_loss: 0.011647238747941123
          total_loss: 0.01904405997031265
          vf_explained_var: 0.05517219379544258
          vf_loss: 0.016186056099832057
    num_agent_steps_sampled: 1015000
    num_agent_steps_trained: 1015000
    num_steps_sampled: 1015000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1015,26187.6,1015000,-2.4142,-1.94,-2.74,241.42


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1016000
  custom_metrics: {}
  date: 2021-11-05_19-49-13
  done: false
  episode_len_mean: 240.17
  episode_media: {}
  episode_reward_max: -1.9400000000000015
  episode_reward_mean: -2.401699999999993
  episode_reward_min: -2.7399999999999856
  episodes_this_iter: 4
  episodes_total: 3800
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12089043666407193
          cur_lr: 5.000000000000001e-05
          entropy: 0.7596356120374468
          entropy_coeff: 0.009999999999999998
          kl: 0.007066094719570041
          policy_loss: -0.06315301300750839
          total_loss: -0.05262019952966107
          vf_explained_var: 0.06074921786785126
          vf_loss: 0.017274944836066827
    num_agent_steps_sampled: 1016000
    num_agent_steps_trained: 1016000
    num_steps_sampled: 1016000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1016,26214.2,1016000,-2.4017,-1.94,-2.74,240.17


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1017000
  custom_metrics: {}
  date: 2021-11-05_19-49-40
  done: false
  episode_len_mean: 239.15
  episode_media: {}
  episode_reward_max: -1.9400000000000015
  episode_reward_mean: -2.391499999999993
  episode_reward_min: -2.7399999999999856
  episodes_this_iter: 5
  episodes_total: 3805
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12089043666407193
          cur_lr: 5.000000000000001e-05
          entropy: 0.8718327171272702
          entropy_coeff: 0.009999999999999998
          kl: 0.013193848372003852
          policy_loss: -0.006824576564961009
          total_loss: 0.006471735942694876
          vf_explained_var: 0.08637911826372147
          vf_loss: 0.020419631650050483
    num_agent_steps_sampled: 1017000
    num_agent_steps_trained: 1017000
    num_steps_sampled: 1017000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1017,26240.6,1017000,-2.3915,-1.94,-2.74,239.15


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1018000
  custom_metrics: {}
  date: 2021-11-05_19-50-05
  done: false
  episode_len_mean: 238.93
  episode_media: {}
  episode_reward_max: -1.9400000000000015
  episode_reward_mean: -2.389299999999993
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 3809
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12089043666407193
          cur_lr: 5.000000000000001e-05
          entropy: 0.9262308518091837
          entropy_coeff: 0.009999999999999998
          kl: 0.019709604533932022
          policy_loss: 0.02164434678852558
          total_loss: 0.029442612868216304
          vf_explained_var: 0.11025548726320267
          vf_loss: 0.014677873388346698
    num_agent_steps_sampled: 1018000
    num_agent_steps_trained: 1018000
    num_steps_sampled: 1018000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1018,26266,1018000,-2.3893,-1.94,-3.04,238.93




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1019000
  custom_metrics: {}
  date: 2021-11-05_19-50-50
  done: false
  episode_len_mean: 237.38
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.373799999999993
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 3813
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12089043666407193
          cur_lr: 5.000000000000001e-05
          entropy: 0.7410967628161113
          entropy_coeff: 0.009999999999999998
          kl: 0.009870200309413255
          policy_loss: 0.01675282617410024
          total_loss: 0.025910348031255935
          vf_explained_var: 0.03819131478667259
          vf_loss: 0.015375277410364813
    num_agent_steps_sampled: 1019000
    num_agent_steps_trained: 1019000
    num_steps_sampled: 1019000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1019,26311.1,1019000,-2.3738,-1.91,-3.04,237.38


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1020000
  custom_metrics: {}
  date: 2021-11-05_19-51-17
  done: false
  episode_len_mean: 236.24
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.362399999999993
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 5
  episodes_total: 3818
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12089043666407193
          cur_lr: 5.000000000000001e-05
          entropy: 0.7350322749879625
          entropy_coeff: 0.009999999999999998
          kl: 0.023682513760141473
          policy_loss: 0.019001097646024492
          total_loss: 0.029452801164653566
          vf_explained_var: 0.14786681532859802
          vf_loss: 0.01493903821748164
    num_agent_steps_sampled: 1020000
    num_agent_steps_trained: 1020000
    num_steps_sampled: 1020000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1020,26338.2,1020000,-2.3624,-1.91,-3.04,236.24


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1021000
  custom_metrics: {}
  date: 2021-11-05_19-51-43
  done: false
  episode_len_mean: 235.58
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.3557999999999937
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 3822
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18133565499610785
          cur_lr: 5.000000000000001e-05
          entropy: 0.8039845029513041
          entropy_coeff: 0.009999999999999998
          kl: 0.004690235584620932
          policy_loss: 0.018357937783002855
          total_loss: 0.023705049686961705
          vf_explained_var: 0.12571263313293457
          vf_loss: 0.012536453290118111
    num_agent_steps_sampled: 1021000
    num_agent_steps_trained: 1021000
    num_steps_sampled: 1021000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1021,26363.9,1021000,-2.3558,-1.91,-3.04,235.58


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1022000
  custom_metrics: {}
  date: 2021-11-05_19-52-09
  done: false
  episode_len_mean: 235.59
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.355899999999994
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 3826
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09066782749805392
          cur_lr: 5.000000000000001e-05
          entropy: 0.9828430069817438
          entropy_coeff: 0.009999999999999998
          kl: 0.009649762940451289
          policy_loss: 0.0015998378396034241
          total_loss: 0.00490223401122623
          vf_explained_var: 0.3018672466278076
          vf_loss: 0.012255903985351323
    num_agent_steps_sampled: 1022000
    num_agent_steps_trained: 1022000
    num_steps_sampled: 1022000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1022,26389.3,1022000,-2.3559,-1.91,-3.04,235.59


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1023000
  custom_metrics: {}
  date: 2021-11-05_19-52-34
  done: false
  episode_len_mean: 236.08
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.3607999999999936
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 3830
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09066782749805392
          cur_lr: 5.000000000000001e-05
          entropy: 0.854887984196345
          entropy_coeff: 0.009999999999999998
          kl: 0.009773899147313298
          policy_loss: 0.02376914703183704
          total_loss: 0.028749992532862558
          vf_explained_var: 0.2699926495552063
          vf_loss: 0.012643546730072962
    num_agent_steps_sampled: 1023000
    num_agent_steps_trained: 1023000
    num_steps_sampled: 1023000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1023,26414.9,1023000,-2.3608,-1.91,-3.04,236.08


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1024000
  custom_metrics: {}
  date: 2021-11-05_19-53-01
  done: false
  episode_len_mean: 235.78
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.3577999999999935
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 3834
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09066782749805392
          cur_lr: 5.000000000000001e-05
          entropy: 0.778189073006312
          entropy_coeff: 0.009999999999999998
          kl: 0.012439588096293145
          policy_loss: 0.040442267805337904
          total_loss: 0.04667003204425176
          vf_explained_var: 0.1850152611732483
          vf_loss: 0.012881788663152191
    num_agent_steps_sampled: 1024000
    num_agent_steps_trained: 1024000
    num_steps_sampled: 1024000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1024,26441.8,1024000,-2.3578,-1.91,-3.04,235.78


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1025000
  custom_metrics: {}
  date: 2021-11-05_19-53-29
  done: false
  episode_len_mean: 234.84
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.3483999999999936
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 5
  episodes_total: 3839
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09066782749805392
          cur_lr: 5.000000000000001e-05
          entropy: 0.47902525001102025
          entropy_coeff: 0.009999999999999998
          kl: 0.003750492509089851
          policy_loss: -0.02811816939049297
          total_loss: -0.019660828676488665
          vf_explained_var: 0.11272432655096054
          vf_loss: 0.01290754545480013
    num_agent_steps_sampled: 1025000
    num_agent_steps_trained: 1025000
    num_steps_sampled: 1025000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1025,26469.5,1025000,-2.3484,-1.91,-3.04,234.84




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1026000
  custom_metrics: {}
  date: 2021-11-05_19-54-13
  done: false
  episode_len_mean: 234.36
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.3435999999999937
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 3843
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04533391374902696
          cur_lr: 5.000000000000001e-05
          entropy: 0.8007995883623759
          entropy_coeff: 0.009999999999999998
          kl: 0.033371183143010005
          policy_loss: 0.026918769462241067
          total_loss: 0.031021351367235182
          vf_explained_var: 0.26298394799232483
          vf_loss: 0.010597732388931844
    num_agent_steps_sampled: 1026000
    num_agent_steps_trained: 1026000
    num_steps_sampled: 1026000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1026,26514,1026000,-2.3436,-1.91,-3.04,234.36


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1027000
  custom_metrics: {}
  date: 2021-11-05_19-54-40
  done: false
  episode_len_mean: 234.08
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.340799999999994
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 3847
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06800087062354045
          cur_lr: 5.000000000000001e-05
          entropy: 0.4503292782439126
          entropy_coeff: 0.009999999999999998
          kl: 0.004371197008335405
          policy_loss: 0.0023458350863721634
          total_loss: 0.007698539975616667
          vf_explained_var: 0.17444895207881927
          vf_loss: 0.009558752748287387
    num_agent_steps_sampled: 1027000
    num_agent_steps_trained: 1027000
    num_steps_sampled: 1027000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1027,26540.7,1027000,-2.3408,-1.91,-3.04,234.08


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1028000
  custom_metrics: {}
  date: 2021-11-05_19-55-06
  done: false
  episode_len_mean: 234.62
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.346199999999994
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 5
  episodes_total: 3852
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03400043531177022
          cur_lr: 5.000000000000001e-05
          entropy: 0.4721799396806293
          entropy_coeff: 0.009999999999999998
          kl: 0.01041834759726391
          policy_loss: -0.008575532833735149
          total_loss: -0.0003937926557328966
          vf_explained_var: 0.20482003688812256
          vf_loss: 0.012549312278214429
    num_agent_steps_sampled: 1028000
    num_agent_steps_trained: 1028000
    num_steps_sampled: 1028000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1028,26567,1028000,-2.3462,-1.91,-3.04,234.62


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1029000
  custom_metrics: {}
  date: 2021-11-05_19-55-33
  done: false
  episode_len_mean: 234.68
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.3467999999999933
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 3856
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03400043531177022
          cur_lr: 5.000000000000001e-05
          entropy: 0.41201276183128355
          entropy_coeff: 0.009999999999999998
          kl: 0.003876607010565536
          policy_loss: 0.06422395838631524
          total_loss: 0.06944067407813337
          vf_explained_var: 0.1550973355770111
          vf_loss: 0.009205036646583014
    num_agent_steps_sampled: 1029000
    num_agent_steps_trained: 1029000
    num_steps_sampled: 1029000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1029,26593.3,1029000,-2.3468,-1.91,-3.04,234.68


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1030000
  custom_metrics: {}
  date: 2021-11-05_19-55-59
  done: false
  episode_len_mean: 234.77
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.3476999999999935
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 3860
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01700021765588511
          cur_lr: 5.000000000000001e-05
          entropy: 0.6250494185421201
          entropy_coeff: 0.009999999999999998
          kl: 0.007440731126663991
          policy_loss: 0.005105405963129467
          total_loss: 0.01308877459830708
          vf_explained_var: 0.01808783784508705
          vf_loss: 0.014107369672920969
    num_agent_steps_sampled: 1030000
    num_agent_steps_trained: 1030000
    num_steps_sampled: 1030000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1030,26620.1,1030000,-2.3477,-1.91,-3.04,234.77


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1031000
  custom_metrics: {}
  date: 2021-11-05_19-56-27
  done: false
  episode_len_mean: 234.78
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.3477999999999937
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 3864
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01700021765588511
          cur_lr: 5.000000000000001e-05
          entropy: 0.39666515092055005
          entropy_coeff: 0.009999999999999998
          kl: 0.007209138804626668
          policy_loss: -0.0669221336642901
          total_loss: -0.057948253883255856
          vf_explained_var: 0.12009311467409134
          vf_loss: 0.012817973488320906
    num_agent_steps_sampled: 1031000
    num_agent_steps_trained: 1031000
    num_steps_sampled: 1031000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1031,26647.2,1031000,-2.3478,-1.91,-3.04,234.78


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1032000
  custom_metrics: {}
  date: 2021-11-05_19-56-54
  done: false
  episode_len_mean: 234.74
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.3473999999999937
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 5
  episodes_total: 3869
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01700021765588511
          cur_lr: 5.000000000000001e-05
          entropy: 0.49678757819864483
          entropy_coeff: 0.009999999999999998
          kl: 0.005245114402410738
          policy_loss: -0.0003070044434732861
          total_loss: 0.00950440987944603
          vf_explained_var: 0.0942504033446312
          vf_loss: 0.014690119866281747
    num_agent_steps_sampled: 1032000
    num_agent_steps_trained: 1032000
    num_steps_sampled: 1032000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1032,26674.4,1032000,-2.3474,-1.91,-3.04,234.74




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1033000
  custom_metrics: {}
  date: 2021-11-05_19-57-38
  done: false
  episode_len_mean: 234.45
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.344499999999994
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 3873
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01700021765588511
          cur_lr: 5.000000000000001e-05
          entropy: 0.49004563126299117
          entropy_coeff: 0.009999999999999998
          kl: 0.024259670104544915
          policy_loss: 0.021172217859162224
          total_loss: 0.0288880522052447
          vf_explained_var: 0.07795435190200806
          vf_loss: 0.01220386892867585
    num_agent_steps_sampled: 1033000
    num_agent_steps_trained: 1033000
    num_steps_sampled: 1033000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1033,26718.1,1033000,-2.3445,-1.91,-3.04,234.45


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1034000
  custom_metrics: {}
  date: 2021-11-05_19-58-05
  done: false
  episode_len_mean: 234.71
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.347099999999994
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 3877
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.025500326483827666
          cur_lr: 5.000000000000001e-05
          entropy: 0.46573526759942374
          entropy_coeff: 0.009999999999999998
          kl: 0.004632018826408234
          policy_loss: -0.07187257442209456
          total_loss: -0.061242338104380505
          vf_explained_var: 0.07887712121009827
          vf_loss: 0.015169470798638132
    num_agent_steps_sampled: 1034000
    num_agent_steps_trained: 1034000
    num_steps_sampled: 1034000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1034,26745.2,1034000,-2.3471,-1.91,-3.04,234.71


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1035000
  custom_metrics: {}
  date: 2021-11-05_19-58-31
  done: false
  episode_len_mean: 234.86
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.3485999999999936
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 5
  episodes_total: 3882
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012750163241913833
          cur_lr: 5.000000000000001e-05
          entropy: 0.5403282208575143
          entropy_coeff: 0.009999999999999998
          kl: 0.005125596318318218
          policy_loss: -0.0012740472952524821
          total_loss: 0.010752174175447889
          vf_explained_var: 0.06212591007351875
          vf_loss: 0.017364153762658437
    num_agent_steps_sampled: 1035000
    num_agent_steps_trained: 1035000
    num_steps_sampled: 1035000
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1035,26771.1,1035000,-2.3486,-1.91,-3.04,234.86


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1036000
  custom_metrics: {}
  date: 2021-11-05_19-58-55
  done: false
  episode_len_mean: 235.16
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.351599999999994
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 3886
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012750163241913833
          cur_lr: 5.000000000000001e-05
          entropy: 0.6092183752192392
          entropy_coeff: 0.009999999999999998
          kl: 0.052757101299556394
          policy_loss: 0.006783098313543532
          total_loss: 0.014159211640556653
          vf_explained_var: 0.33863943815231323
          vf_loss: 0.012795635509408182
    num_agent_steps_sampled: 1036000
    num_agent_steps_trained: 1036000
    num_steps_sampled: 1036000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1036,26795.9,1036000,-2.3516,-1.91,-3.04,235.16


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1037000
  custom_metrics: {}
  date: 2021-11-05_19-59-22
  done: false
  episode_len_mean: 234.57
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.3456999999999937
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 3890
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.019125244862870757
          cur_lr: 5.000000000000001e-05
          entropy: 0.6783102326922946
          entropy_coeff: 0.009999999999999998
          kl: 0.009455128044345902
          policy_loss: 0.028152277941505113
          total_loss: 0.03398606350852384
          vf_explained_var: 0.07967085391283035
          vf_loss: 0.012436056561354134
    num_agent_steps_sampled: 1037000
    num_agent_steps_trained: 1037000
    num_steps_sampled: 1037000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1037,26822.1,1037000,-2.3457,-1.91,-3.04,234.57


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1038000
  custom_metrics: {}
  date: 2021-11-05_19-59-47
  done: false
  episode_len_mean: 234.97
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.3496999999999937
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 3894
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.019125244862870757
          cur_lr: 5.000000000000001e-05
          entropy: 0.6882211609019173
          entropy_coeff: 0.009999999999999998
          kl: 0.03318618425224097
          policy_loss: 0.03076310066713227
          total_loss: 0.03766665061314901
          vf_explained_var: 0.03304202854633331
          vf_loss: 0.013151066067318121
    num_agent_steps_sampled: 1038000
    num_agent_steps_trained: 1038000
    num_steps_sampled: 1038000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1038,26847.6,1038000,-2.3497,-1.91,-3.04,234.97


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1039000
  custom_metrics: {}
  date: 2021-11-05_20-00-14
  done: false
  episode_len_mean: 235.39
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.353899999999993
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 3898
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02868786729430613
          cur_lr: 5.000000000000001e-05
          entropy: 0.5685752514335844
          entropy_coeff: 0.009999999999999998
          kl: 0.020174323057055212
          policy_loss: 0.015019903497563467
          total_loss: 0.022480291707648172
          vf_explained_var: 0.1252095252275467
          vf_loss: 0.012567383102658721
    num_agent_steps_sampled: 1039000
    num_agent_steps_trained: 1039000
    num_steps_sampled: 1039000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1039,26874.1,1039000,-2.3539,-1.91,-3.04,235.39




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1040000
  custom_metrics: {}
  date: 2021-11-05_20-00-57
  done: false
  episode_len_mean: 234.71
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.3470999999999935
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 5
  episodes_total: 3903
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04303180094145919
          cur_lr: 5.000000000000001e-05
          entropy: 0.5651481515831418
          entropy_coeff: 0.009999999999999998
          kl: 0.0052750093401501864
          policy_loss: -0.005819133669137955
          total_loss: 0.003922140722473462
          vf_explained_var: 0.13189072906970978
          vf_loss: 0.01516576638031337
    num_agent_steps_sampled: 1040000
    num_agent_steps_trained: 1040000
    num_steps_sampled: 1040000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1040,26917.5,1040000,-2.3471,-1.91,-3.04,234.71


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1041000
  custom_metrics: {}
  date: 2021-11-05_20-01-25
  done: false
  episode_len_mean: 234.62
  episode_media: {}
  episode_reward_max: -1.9100000000000015
  episode_reward_mean: -2.346199999999994
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 3907
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04303180094145919
          cur_lr: 5.000000000000001e-05
          entropy: 0.312723794248369
          entropy_coeff: 0.009999999999999998
          kl: 0.007406730694792202
          policy_loss: 0.04305943846702576
          total_loss: 0.04867094912462765
          vf_explained_var: 0.08623841404914856
          vf_loss: 0.008420022170887225
    num_agent_steps_sampled: 1041000
    num_agent_steps_trained: 1041000
    num_steps_sampled: 1041000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1041,26944.9,1041000,-2.3462,-1.91,-3.04,234.62


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1042000
  custom_metrics: {}
  date: 2021-11-05_20-01-52
  done: false
  episode_len_mean: 233.67
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.336699999999994
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 5
  episodes_total: 3912
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04303180094145919
          cur_lr: 5.000000000000001e-05
          entropy: 0.2416627953449885
          entropy_coeff: 0.009999999999999998
          kl: 0.0037711491061654277
          policy_loss: 0.007908675240145789
          total_loss: 0.019445236689514585
          vf_explained_var: 0.04174290597438812
          vf_loss: 0.013790910163273414
    num_agent_steps_sampled: 1042000
    num_agent_steps_trained: 1042000
    num_steps_sampled: 1042000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1042,26972.6,1042000,-2.3367,-1.92,-2.95,233.67


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1043000
  custom_metrics: {}
  date: 2021-11-05_20-02-20
  done: false
  episode_len_mean: 233.64
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.336399999999994
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 3916
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.021515900470729595
          cur_lr: 5.000000000000001e-05
          entropy: 0.42556782364845275
          entropy_coeff: 0.009999999999999998
          kl: 0.003979244625115304
          policy_loss: 0.035446311036745705
          total_loss: 0.041872687886158624
          vf_explained_var: 0.12056528776884079
          vf_loss: 0.0105964421812031
    num_agent_steps_sampled: 1043000
    num_agent_steps_trained: 1043000
    num_steps_sampled: 1043000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1043,26999.8,1043000,-2.3364,-1.92,-2.95,233.64


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1044000
  custom_metrics: {}
  date: 2021-11-05_20-02-47
  done: false
  episode_len_mean: 233.19
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.3318999999999943
  episode_reward_min: -2.949999999999981
  episodes_this_iter: 4
  episodes_total: 3920
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010757950235364798
          cur_lr: 5.000000000000001e-05
          entropy: 0.42560332516829175
          entropy_coeff: 0.009999999999999998
          kl: 0.003552336120007106
          policy_loss: -0.015899474918842315
          total_loss: -0.008005147758457396
          vf_explained_var: 0.07987958192825317
          vf_loss: 0.012112143221828672
    num_agent_steps_sampled: 1044000
    num_agent_steps_trained: 1044000
    num_steps_sampled: 1044000
    num_steps_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1044,27027.3,1044000,-2.3319,-1.92,-2.95,233.19


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1045000
  custom_metrics: {}
  date: 2021-11-05_20-03-14
  done: false
  episode_len_mean: 231.95
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.3194999999999943
  episode_reward_min: -2.7499999999999853
  episodes_this_iter: 5
  episodes_total: 3925
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005378975117682399
          cur_lr: 5.000000000000001e-05
          entropy: 0.3233543824818399
          entropy_coeff: 0.009999999999999998
          kl: 0.012211262251313946
          policy_loss: 0.0003914652599228753
          total_loss: 0.012517837931712468
          vf_explained_var: 0.03679308667778969
          vf_loss: 0.015294232643726799
    num_agent_steps_sampled: 1045000
    num_agent_steps_trained: 1045000
    num_steps_sampled: 1045000
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1045,27054.6,1045000,-2.3195,-1.92,-2.75,231.95


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1046000
  custom_metrics: {}
  date: 2021-11-05_20-03-43
  done: false
  episode_len_mean: 231.18
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.3117999999999945
  episode_reward_min: -2.7499999999999853
  episodes_this_iter: 4
  episodes_total: 3929
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005378975117682399
          cur_lr: 5.000000000000001e-05
          entropy: 0.2723623969488674
          entropy_coeff: 0.009999999999999998
          kl: 0.002020801840904893
          policy_loss: -0.04846031980382071
          total_loss: -0.03926139589813021
          vf_explained_var: 0.1160779744386673
          vf_loss: 0.011911679773281019
    num_agent_steps_sampled: 1046000
    num_agent_steps_trained: 1046000
    num_steps_sampled: 1046000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1046,27082.9,1046000,-2.3118,-1.92,-2.75,231.18




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1047000
  custom_metrics: {}
  date: 2021-11-05_20-04-27
  done: false
  episode_len_mean: 230.43
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.3042999999999947
  episode_reward_min: -2.7499999999999853
  episodes_this_iter: 5
  episodes_total: 3934
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0026894875588411994
          cur_lr: 5.000000000000001e-05
          entropy: 0.35811317579613794
          entropy_coeff: 0.009999999999999998
          kl: 0.008376221350748224
          policy_loss: 0.00268431082367897
          total_loss: 0.012570722318357892
          vf_explained_var: 0.12434292584657669
          vf_loss: 0.01344501462040676
    num_agent_steps_sampled: 1047000
    num_agent_steps_trained: 1047000
    num_steps_sampled: 1047000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1047,27127.4,1047000,-2.3043,-1.92,-2.75,230.43


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1048000
  custom_metrics: {}
  date: 2021-11-05_20-04-55
  done: false
  episode_len_mean: 230.54
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.3053999999999943
  episode_reward_min: -2.7499999999999853
  episodes_this_iter: 4
  episodes_total: 3938
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0026894875588411994
          cur_lr: 5.000000000000001e-05
          entropy: 0.23418130874633789
          entropy_coeff: 0.009999999999999998
          kl: 0.003924443172627908
          policy_loss: -0.02610825320912732
          total_loss: -0.015018550306558609
          vf_explained_var: 0.05889975279569626
          vf_loss: 0.013420959293014473
    num_agent_steps_sampled: 1048000
    num_agent_steps_trained: 1048000
    num_steps_sampled: 1048000
    num_steps

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1048,27154.7,1048000,-2.3054,-1.92,-2.75,230.54


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1049000
  custom_metrics: {}
  date: 2021-11-05_20-05-19
  done: false
  episode_len_mean: 230.97
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.3096999999999945
  episode_reward_min: -2.7499999999999853
  episodes_this_iter: 4
  episodes_total: 3942
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0013447437794205997
          cur_lr: 5.000000000000001e-05
          entropy: 0.5069808531138632
          entropy_coeff: 0.009999999999999998
          kl: 0.10402302951846738
          policy_loss: -0.10281956684258249
          total_loss: -0.09248555526137352
          vf_explained_var: 0.23516646027565002
          vf_loss: 0.01526393529234661
    num_agent_steps_sampled: 1049000
    num_agent_steps_trained: 1049000
    num_steps_sampled: 1049000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1049,27179.5,1049000,-2.3097,-1.97,-2.75,230.97


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1050000
  custom_metrics: {}
  date: 2021-11-05_20-05-47
  done: false
  episode_len_mean: 231.59
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.3158999999999947
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 5
  episodes_total: 3947
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0020171156691309
          cur_lr: 5.000000000000001e-05
          entropy: 0.33405345098839867
          entropy_coeff: 0.009999999999999998
          kl: 0.005354099892763189
          policy_loss: -0.03168703094124794
          total_loss: -0.01996749598118994
          vf_explained_var: 0.2167041003704071
          vf_loss: 0.015049271683933007
    num_agent_steps_sampled: 1050000
    num_agent_steps_trained: 1050000
    num_steps_sampled: 1050000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1050,27206.6,1050000,-2.3159,-1.97,-3.27,231.59


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1051000
  custom_metrics: {}
  date: 2021-11-05_20-06-15
  done: false
  episode_len_mean: 231.19
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.3118999999999947
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 4
  episodes_total: 3951
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0020171156691309
          cur_lr: 5.000000000000001e-05
          entropy: 0.22850930558310614
          entropy_coeff: 0.009999999999999998
          kl: 0.004515207558507781
          policy_loss: 0.02412121461497413
          total_loss: 0.03324750786026319
          vf_explained_var: 0.13208602368831635
          vf_loss: 0.011402279707706638
    num_agent_steps_sampled: 1051000
    num_agent_steps_trained: 1051000
    num_steps_sampled: 1051000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1051,27234.7,1051000,-2.3119,-1.97,-3.27,231.19


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1052000
  custom_metrics: {}
  date: 2021-11-05_20-06-42
  done: false
  episode_len_mean: 230.53
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.3052999999999946
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 5
  episodes_total: 3956
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00100855783456545
          cur_lr: 5.000000000000001e-05
          entropy: 0.35544585751162633
          entropy_coeff: 0.009999999999999998
          kl: 0.007983591993096518
          policy_loss: -0.011960448573033015
          total_loss: 0.000636562125550376
          vf_explained_var: 0.22937463223934174
          vf_loss: 0.016143418786426385
    num_agent_steps_sampled: 1052000
    num_agent_steps_trained: 1052000
    num_steps_sampled: 1052000
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1052,27262.3,1052000,-2.3053,-1.97,-3.27,230.53




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1053000
  custom_metrics: {}
  date: 2021-11-05_20-07-28
  done: false
  episode_len_mean: 229.73
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.297299999999995
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 4
  episodes_total: 3960
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00100855783456545
          cur_lr: 5.000000000000001e-05
          entropy: 0.30627258486217923
          entropy_coeff: 0.009999999999999998
          kl: 0.07273549675213234
          policy_loss: 0.06287153801984258
          total_loss: 0.07007830407884386
          vf_explained_var: 0.26469770073890686
          vf_loss: 0.010196130859872534
    num_agent_steps_sampled: 1053000
    num_agent_steps_trained: 1053000
    num_steps_sampled: 1053000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1053,27307.5,1053000,-2.2973,-1.96,-3.27,229.73


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1054000
  custom_metrics: {}
  date: 2021-11-05_20-07-57
  done: false
  episode_len_mean: 229.5
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.294999999999995
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 5
  episodes_total: 3965
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001512836751848175
          cur_lr: 5.000000000000001e-05
          entropy: 0.4460465957721074
          entropy_coeff: 0.009999999999999998
          kl: 0.04384860619631405
          policy_loss: -0.043306736316945815
          total_loss: -0.03109670446978675
          vf_explained_var: 0.21060791611671448
          vf_loss: 0.01660416027944949
    num_agent_steps_sampled: 1054000
    num_agent_steps_trained: 1054000
    num_steps_sampled: 1054000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1054,27336.8,1054000,-2.295,-1.96,-3.27,229.5


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1055000
  custom_metrics: {}
  date: 2021-11-05_20-08-24
  done: false
  episode_len_mean: 229.7
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.296999999999995
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 4
  episodes_total: 3969
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0022692551277722624
          cur_lr: 5.000000000000001e-05
          entropy: 0.46003943830728533
          entropy_coeff: 0.009999999999999998
          kl: 0.07542211375124926
          policy_loss: 0.021677475422620773
          total_loss: 0.024836525486575233
          vf_explained_var: 0.4386977255344391
          vf_loss: 0.00758828924347957
    num_agent_steps_sampled: 1055000
    num_agent_steps_trained: 1055000
    num_steps_sampled: 1055000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1055,27363.8,1055000,-2.297,-1.96,-3.27,229.7


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1056000
  custom_metrics: {}
  date: 2021-11-05_20-08-51
  done: false
  episode_len_mean: 229.75
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.297499999999995
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 4
  episodes_total: 3973
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.003403882691658393
          cur_lr: 5.000000000000001e-05
          entropy: 0.5472424334949917
          entropy_coeff: 0.009999999999999998
          kl: 0.05035788316842371
          policy_loss: -0.027731783108578788
          total_loss: -0.023699083427588145
          vf_explained_var: 0.4021398723125458
          vf_loss: 0.009333709974048867
    num_agent_steps_sampled: 1056000
    num_agent_steps_trained: 1056000
    num_steps_sampled: 1056000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1056,27390.6,1056000,-2.2975,-1.96,-3.27,229.75


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1057000
  custom_metrics: {}
  date: 2021-11-05_20-09-18
  done: false
  episode_len_mean: 229.22
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.2921999999999954
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 5
  episodes_total: 3978
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005105824037487591
          cur_lr: 5.000000000000001e-05
          entropy: 0.3976036912865109
          entropy_coeff: 0.009999999999999998
          kl: 0.028768995720445723
          policy_loss: 0.02093379373351733
          total_loss: 0.03205241478151745
          vf_explained_var: 0.17454534769058228
          vf_loss: 0.014947766717523337
    num_agent_steps_sampled: 1057000
    num_agent_steps_trained: 1057000
    num_steps_sampled: 1057000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1057,27417.6,1057000,-2.2922,-1.96,-3.27,229.22


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1058000
  custom_metrics: {}
  date: 2021-11-05_20-09-40
  done: false
  episode_len_mean: 231.43
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.314299999999994
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 3981
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0076587360562313835
          cur_lr: 5.000000000000001e-05
          entropy: 0.8734228160646227
          entropy_coeff: 0.009999999999999998
          kl: 0.05591986131203544
          policy_loss: 0.0999345792664422
          total_loss: 0.09932629085249371
          vf_explained_var: 0.190103679895401
          vf_loss: 0.007697663699380226
    num_agent_steps_sampled: 1058000
    num_agent_steps_trained: 1058000
    num_steps_sampled: 1058000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1058,27439.3,1058000,-2.3143,-1.96,-4.75,231.43


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1059000
  custom_metrics: {}
  date: 2021-11-05_20-09-56
  done: false
  episode_len_mean: 234.08
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.3407999999999936
  episode_reward_min: -5.079999999999936
  episodes_this_iter: 2
  episodes_total: 3983
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011488104084347079
          cur_lr: 5.000000000000001e-05
          entropy: 1.2768692228529188
          entropy_coeff: 0.009999999999999998
          kl: 0.01903814484937906
          policy_loss: -0.10462134877840677
          total_loss: -0.10905726535452737
          vf_explained_var: -0.2935294508934021
          vf_loss: 0.00811406514484487
    num_agent_steps_sampled: 1059000
    num_agent_steps_trained: 1059000
    num_steps_sampled: 1059000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1059,27455.8,1059000,-2.3408,-1.96,-5.08,234.08


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1060000
  custom_metrics: {}
  date: 2021-11-05_20-10-17
  done: false
  episode_len_mean: 237.21
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.3720999999999934
  episode_reward_min: -5.079999999999936
  episodes_this_iter: 4
  episodes_total: 3987
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011488104084347079
          cur_lr: 5.000000000000001e-05
          entropy: 1.0593904773394267
          entropy_coeff: 0.009999999999999998
          kl: 0.03524699693921549
          policy_loss: -0.000590553010503451
          total_loss: 0.0009022795905669531
          vf_explained_var: 0.05047927796840668
          vf_loss: 0.011681816269022724
    num_agent_steps_sampled: 1060000
    num_agent_steps_trained: 1060000
    num_steps_sampled: 1060000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1060,27476.8,1060000,-2.3721,-1.96,-5.08,237.21




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1061000
  custom_metrics: {}
  date: 2021-11-05_20-10-57
  done: false
  episode_len_mean: 238.97
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.3896999999999924
  episode_reward_min: -5.079999999999936
  episodes_this_iter: 3
  episodes_total: 3990
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017232156126520615
          cur_lr: 5.000000000000001e-05
          entropy: 1.0377306875255372
          entropy_coeff: 0.009999999999999998
          kl: 0.05537824047218839
          policy_loss: 0.020167470475037894
          total_loss: 0.018300298187467787
          vf_explained_var: 0.054736778140068054
          vf_loss: 0.007555844701386781
    num_agent_steps_sampled: 1061000
    num_agent_steps_trained: 1061000
    num_steps_sampled: 1061000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1061,27517,1061000,-2.3897,-1.96,-5.08,238.97


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1062000
  custom_metrics: {}
  date: 2021-11-05_20-11-13
  done: false
  episode_len_mean: 243.26
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.432599999999992
  episode_reward_min: -5.079999999999936
  episodes_this_iter: 3
  episodes_total: 3993
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.025848234189780932
          cur_lr: 5.000000000000001e-05
          entropy: 1.4542206671502855
          entropy_coeff: 0.009999999999999998
          kl: 0.0235176176250709
          policy_loss: 0.04487096443772316
          total_loss: 0.03830340728163719
          vf_explained_var: 0.22198954224586487
          vf_loss: 0.007366763086368641
    num_agent_steps_sampled: 1062000
    num_agent_steps_trained: 1062000
    num_steps_sampled: 1062000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1062,27532.8,1062000,-2.4326,-1.96,-5.08,243.26


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1063000
  custom_metrics: {}
  date: 2021-11-05_20-11-28
  done: false
  episode_len_mean: 248.07
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.4806999999999912
  episode_reward_min: -5.079999999999936
  episodes_this_iter: 2
  episodes_total: 3995
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03877235128467139
          cur_lr: 5.000000000000001e-05
          entropy: 1.6740488449732462
          entropy_coeff: 0.009999999999999998
          kl: 0.022112263340050634
          policy_loss: 0.09168868164221446
          total_loss: 0.08196069929334852
          vf_explained_var: -0.6271244287490845
          vf_loss: 0.0061551561731094905
    num_agent_steps_sampled: 1063000
    num_agent_steps_trained: 1063000
    num_steps_sampled: 1063000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1063,27547.7,1063000,-2.4807,-1.96,-5.08,248.07


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1064000
  custom_metrics: {}
  date: 2021-11-05_20-11-44
  done: false
  episode_len_mean: 252.58
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.5257999999999905
  episode_reward_min: -5.079999999999936
  episodes_this_iter: 2
  episodes_total: 3997
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05815852692700709
          cur_lr: 5.000000000000001e-05
          entropy: 1.6854078372319539
          entropy_coeff: 0.009999999999999998
          kl: 0.02011887991866307
          policy_loss: 0.14947342053055762
          total_loss: 0.13646736360258527
          vf_explained_var: -0.5082598328590393
          vf_loss: 0.002677936150171768
    num_agent_steps_sampled: 1064000
    num_agent_steps_trained: 1064000
    num_steps_sampled: 1064000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1064,27563.2,1064000,-2.5258,-1.96,-5.08,252.58


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1065000
  custom_metrics: {}
  date: 2021-11-05_20-12-00
  done: false
  episode_len_mean: 256.4
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.5639999999999894
  episode_reward_min: -5.079999999999936
  episodes_this_iter: 2
  episodes_total: 3999
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0872377903905106
          cur_lr: 5.000000000000001e-05
          entropy: 1.7068513340420193
          entropy_coeff: 0.009999999999999998
          kl: 0.04466468891932761
          policy_loss: -0.08527269098493788
          total_loss: -0.08556479132837719
          vf_explained_var: -0.292810320854187
          vf_loss: 0.012879965510607386
    num_agent_steps_sampled: 1065000
    num_agent_steps_trained: 1065000
    num_steps_sampled: 1065000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1065,27579.8,1065000,-2.564,-1.96,-5.08,256.4


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1066000
  custom_metrics: {}
  date: 2021-11-05_20-12-16
  done: false
  episode_len_mean: 260.35
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.603499999999989
  episode_reward_min: -5.079999999999936
  episodes_this_iter: 2
  episodes_total: 4001
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.130856685585766
          cur_lr: 5.000000000000001e-05
          entropy: 1.7784087737401326
          entropy_coeff: 0.009999999999999998
          kl: 0.02429627186435255
          policy_loss: -0.08270865562889311
          total_loss: -0.08457324388954375
          vf_explained_var: -0.24956512451171875
          vf_loss: 0.012740168671330643
    num_agent_steps_sampled: 1066000
    num_agent_steps_trained: 1066000
    num_steps_sampled: 1066000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1066,27595.7,1066000,-2.6035,-1.96,-5.08,260.35


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1067000
  custom_metrics: {}
  date: 2021-11-05_20-12-35
  done: false
  episode_len_mean: 264.69
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.646899999999988
  episode_reward_min: -5.079999999999936
  episodes_this_iter: 3
  episodes_total: 4004
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.196285028378649
          cur_lr: 5.000000000000001e-05
          entropy: 1.301200184557173
          entropy_coeff: 0.009999999999999998
          kl: 0.011774376538430432
          policy_loss: -0.11111650764942169
          total_loss: -0.10783717309435209
          vf_explained_var: 0.17142228782176971
          vf_loss: 0.013980199872619576
    num_agent_steps_sampled: 1067000
    num_agent_steps_trained: 1067000
    num_steps_sampled: 1067000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1067,27614.2,1067000,-2.6469,-1.96,-5.08,264.69


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1068000
  custom_metrics: {}
  date: 2021-11-05_20-12-55
  done: false
  episode_len_mean: 269.24
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.692399999999987
  episode_reward_min: -5.079999999999936
  episodes_this_iter: 3
  episodes_total: 4007
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.196285028378649
          cur_lr: 5.000000000000001e-05
          entropy: 1.3568671875529819
          entropy_coeff: 0.009999999999999998
          kl: 0.009995766449821275
          policy_loss: -0.05996990584664875
          total_loss: -0.061198176443576814
          vf_explained_var: 0.33612239360809326
          vf_loss: 0.010378380545363244
    num_agent_steps_sampled: 1068000
    num_agent_steps_trained: 1068000
    num_steps_sampled: 1068000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1068,27634.3,1068000,-2.6924,-1.96,-5.08,269.24


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1069000
  custom_metrics: {}
  date: 2021-11-05_20-13-16
  done: false
  episode_len_mean: 269.79
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.6978999999999864
  episode_reward_min: -5.079999999999936
  episodes_this_iter: 3
  episodes_total: 4010
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.196285028378649
          cur_lr: 5.000000000000001e-05
          entropy: 0.9335273861885071
          entropy_coeff: 0.009999999999999998
          kl: 0.009756842447502202
          policy_loss: -0.10321843491660224
          total_loss: -0.0984348005718655
          vf_explained_var: 0.2578074038028717
          vf_loss: 0.01220378629449341
    num_agent_steps_sampled: 1069000
    num_agent_steps_trained: 1069000
    num_steps_sampled: 1069000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1069,27655.3,1069000,-2.6979,-1.96,-5.08,269.79


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1070000
  custom_metrics: {}
  date: 2021-11-05_20-13-33
  done: false
  episode_len_mean: 275.56
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.755599999999985
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 3
  episodes_total: 4013
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.196285028378649
          cur_lr: 5.000000000000001e-05
          entropy: 1.3176543785466088
          entropy_coeff: 0.009999999999999998
          kl: 0.012668855686372677
          policy_loss: -0.09140140861272812
          total_loss: -0.08692734142144522
          vf_explained_var: -0.3486476242542267
          vf_loss: 0.015163905066179319
    num_agent_steps_sampled: 1070000
    num_agent_steps_trained: 1070000
    num_steps_sampled: 1070000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1070,27672.7,1070000,-2.7556,-1.96,-5.13,275.56


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1071000
  custom_metrics: {}
  date: 2021-11-05_20-13-52
  done: false
  episode_len_mean: 280.32
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.803199999999984
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 3
  episodes_total: 4016
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.196285028378649
          cur_lr: 5.000000000000001e-05
          entropy: 1.3743313365512424
          entropy_coeff: 0.009999999999999998
          kl: 0.009270278586409119
          policy_loss: 0.062039582928021746
          total_loss: 0.060072720133596
          vf_explained_var: -0.1837620884180069
          vf_loss: 0.009956832999078971
    num_agent_steps_sampled: 1071000
    num_agent_steps_trained: 1071000
    num_steps_sampled: 1071000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1071,27691,1071000,-2.8032,-1.96,-5.13,280.32


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1072000
  custom_metrics: {}
  date: 2021-11-05_20-14-07
  done: false
  episode_len_mean: 285.01
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.850099999999983
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 2
  episodes_total: 4018
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.196285028378649
          cur_lr: 5.000000000000001e-05
          entropy: 1.6291856898201837
          entropy_coeff: 0.009999999999999998
          kl: 0.016885129470934068
          policy_loss: 0.06669523335165448
          total_loss: 0.06074432945913739
          vf_explained_var: -0.3300429582595825
          vf_loss: 0.007026656703803585
    num_agent_steps_sampled: 1072000
    num_agent_steps_trained: 1072000
    num_steps_sampled: 1072000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1072,27706.5,1072000,-2.8501,-1.96,-5.13,285.01




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1073000
  custom_metrics: {}
  date: 2021-11-05_20-14-46
  done: false
  episode_len_mean: 288.36
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.8835999999999817
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 3
  episodes_total: 4021
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.196285028378649
          cur_lr: 5.000000000000001e-05
          entropy: 1.3461507585313586
          entropy_coeff: 0.009999999999999998
          kl: 0.011829212483193055
          policy_loss: 0.045247098141246375
          total_loss: 0.045699283480644226
          vf_explained_var: -0.19156625866889954
          vf_loss: 0.01159179448958538
    num_agent_steps_sampled: 1073000
    num_agent_steps_trained: 1073000
    num_steps_sampled: 1073000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1073,27745.2,1073000,-2.8836,-1.96,-5.13,288.36


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1074000
  custom_metrics: {}
  date: 2021-11-05_20-15-09
  done: false
  episode_len_mean: 290.64
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.9063999999999823
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 4
  episodes_total: 4025
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.196285028378649
          cur_lr: 5.000000000000001e-05
          entropy: 1.1088757157325744
          entropy_coeff: 0.009999999999999998
          kl: 0.008930518184083308
          policy_loss: -0.004850285003582636
          total_loss: 0.001075516723924213
          vf_explained_var: 0.23182418942451477
          vf_loss: 0.015261631893614928
    num_agent_steps_sampled: 1074000
    num_agent_steps_trained: 1074000
    num_steps_sampled: 1074000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1074,27767.9,1074000,-2.9064,-1.96,-5.13,290.64


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1075000
  custom_metrics: {}
  date: 2021-11-05_20-15-31
  done: false
  episode_len_mean: 292.83
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.9282999999999815
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 3
  episodes_total: 4028
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.196285028378649
          cur_lr: 5.000000000000001e-05
          entropy: 1.0015461805793975
          entropy_coeff: 0.009999999999999998
          kl: 0.0057882624452056395
          policy_loss: 0.07635563148392571
          total_loss: 0.07624050610595279
          vf_explained_var: -0.04999462887644768
          vf_loss: 0.008764189308567438
    num_agent_steps_sampled: 1075000
    num_agent_steps_trained: 1075000
    num_steps_sampled: 1075000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1075,27790.6,1075000,-2.9283,-1.96,-5.13,292.83


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1076000
  custom_metrics: {}
  date: 2021-11-05_20-15-51
  done: false
  episode_len_mean: 295.22
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.9521999999999817
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 3
  episodes_total: 4031
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.196285028378649
          cur_lr: 5.000000000000001e-05
          entropy: 1.1326580766174528
          entropy_coeff: 0.009999999999999998
          kl: 0.013010649314663938
          policy_loss: -0.09270464115672641
          total_loss: -0.0868680382768313
          vf_explained_var: 0.2237469106912613
          vf_loss: 0.014609389043309622
    num_agent_steps_sampled: 1076000
    num_agent_steps_trained: 1076000
    num_steps_sampled: 1076000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1076,27810.3,1076000,-2.9522,-1.96,-5.13,295.22


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1077000
  custom_metrics: {}
  date: 2021-11-05_20-16-11
  done: false
  episode_len_mean: 299.44
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.99439999999998
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 3
  episodes_total: 4034
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.196285028378649
          cur_lr: 5.000000000000001e-05
          entropy: 1.2845270587338342
          entropy_coeff: 0.009999999999999998
          kl: 0.009094195136550492
          policy_loss: 0.03745416402816772
          total_loss: 0.03809696353144116
          vf_explained_var: -0.14475703239440918
          vf_loss: 0.01170301680394914
    num_agent_steps_sampled: 1077000
    num_agent_steps_trained: 1077000
    num_steps_sampled: 1077000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1077,27830.3,1077000,-2.9944,-1.96,-5.13,299.44


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1078000
  custom_metrics: {}
  date: 2021-11-05_20-16-32
  done: false
  episode_len_mean: 301.23
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -3.0122999999999798
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 3
  episodes_total: 4037
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.196285028378649
          cur_lr: 5.000000000000001e-05
          entropy: 1.0502621481815975
          entropy_coeff: 0.009999999999999998
          kl: 0.012044147637896982
          policy_loss: -0.0901808003998465
          total_loss: -0.08330950451393922
          vf_explained_var: 0.26225385069847107
          vf_loss: 0.015009833096216122
    num_agent_steps_sampled: 1078000
    num_agent_steps_trained: 1078000
    num_steps_sampled: 1078000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1078,27850.9,1078000,-3.0123,-1.96,-5.13,301.23


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1079000
  custom_metrics: {}
  date: 2021-11-05_20-16-53
  done: false
  episode_len_mean: 304.76
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -3.0475999999999788
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 4
  episodes_total: 4041
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.196285028378649
          cur_lr: 5.000000000000001e-05
          entropy: 0.7786830392148759
          entropy_coeff: 0.009999999999999998
          kl: 0.020093711323375826
          policy_loss: 0.03988256346848276
          total_loss: 0.05070961655841933
          vf_explained_var: 0.20885223150253296
          vf_loss: 0.014669790263805124
    num_agent_steps_sampled: 1079000
    num_agent_steps_trained: 1079000
    num_steps_sampled: 1079000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1079,27872.4,1079000,-3.0476,-1.96,-5.13,304.76


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1080000
  custom_metrics: {}
  date: 2021-11-05_20-17-11
  done: false
  episode_len_mean: 307.07
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -3.0706999999999782
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 2
  episodes_total: 4043
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29442754256797343
          cur_lr: 5.000000000000001e-05
          entropy: 1.4956623315811157
          entropy_coeff: 0.009999999999999998
          kl: 0.014058422266418605
          policy_loss: -0.09467047916518317
          total_loss: -0.09180662284294765
          vf_explained_var: -0.2969376742839813
          vf_loss: 0.01368129213547541
    num_agent_steps_sampled: 1080000
    num_agent_steps_trained: 1080000
    num_steps_sampled: 1080000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1080,27889.9,1080000,-3.0707,-1.96,-5.13,307.07


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1081000
  custom_metrics: {}
  date: 2021-11-05_20-17-31
  done: false
  episode_len_mean: 310.95
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -3.1094999999999775
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 3
  episodes_total: 4046
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29442754256797343
          cur_lr: 5.000000000000001e-05
          entropy: 1.2910941984918383
          entropy_coeff: 0.009999999999999998
          kl: 0.013060915175261477
          policy_loss: 0.011496532956759136
          total_loss: 0.015787349475754633
          vf_explained_var: -0.3099638521671295
          vf_loss: 0.01335626558931027
    num_agent_steps_sampled: 1081000
    num_agent_steps_trained: 1081000
    num_steps_sampled: 1081000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1081,27910.5,1081000,-3.1095,-1.96,-5.13,310.95




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1082000
  custom_metrics: {}
  date: 2021-11-05_20-18-10
  done: false
  episode_len_mean: 313.3
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -3.132999999999977
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 4
  episodes_total: 4050
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29442754256797343
          cur_lr: 5.000000000000001e-05
          entropy: 0.858908595972591
          entropy_coeff: 0.009999999999999998
          kl: 0.007114273876370369
          policy_loss: 0.04219350351227654
          total_loss: 0.04935499197906918
          vf_explained_var: 0.26169151067733765
          vf_loss: 0.013655936304065917
    num_agent_steps_sampled: 1082000
    num_agent_steps_trained: 1082000
    num_steps_sampled: 1082000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1082,27949.3,1082000,-3.133,-1.96,-5.13,313.3


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1083000
  custom_metrics: {}
  date: 2021-11-05_20-18-35
  done: false
  episode_len_mean: 314.5
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -3.1449999999999765
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 4
  episodes_total: 4054
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29442754256797343
          cur_lr: 5.000000000000001e-05
          entropy: 0.6028981977038913
          entropy_coeff: 0.009999999999999998
          kl: 0.005449843490827217
          policy_loss: 0.02952258379922973
          total_loss: 0.03776465654373169
          vf_explained_var: 0.33086010813713074
          vf_loss: 0.01266647269949317
    num_agent_steps_sampled: 1083000
    num_agent_steps_trained: 1083000
    num_steps_sampled: 1083000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1083,27974.2,1083000,-3.145,-1.96,-5.13,314.5


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1084000
  custom_metrics: {}
  date: 2021-11-05_20-18-58
  done: false
  episode_len_mean: 316.13
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -3.1612999999999767
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 3
  episodes_total: 4057
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29442754256797343
          cur_lr: 5.000000000000001e-05
          entropy: 1.2482543494966296
          entropy_coeff: 0.009999999999999998
          kl: 0.00746510497422403
          policy_loss: -0.008058134218056996
          total_loss: -0.007381335563129849
          vf_explained_var: -0.014108965173363686
          vf_loss: 0.010961406339063413
    num_agent_steps_sampled: 1084000
    num_agent_steps_trained: 1084000
    num_steps_sampled: 1084000
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1084,27996.7,1084000,-3.1613,-1.96,-5.13,316.13


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1085000
  custom_metrics: {}
  date: 2021-11-05_20-19-21
  done: false
  episode_len_mean: 318.48
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.1847999999999757
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 4
  episodes_total: 4061
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29442754256797343
          cur_lr: 5.000000000000001e-05
          entropy: 0.9653322127130296
          entropy_coeff: 0.009999999999999998
          kl: 0.014951690751379311
          policy_loss: -0.012510796387990316
          total_loss: -0.004588951170444488
          vf_explained_var: 0.10298953950405121
          vf_loss: 0.013172980377243624
    num_agent_steps_sampled: 1085000
    num_agent_steps_trained: 1085000
    num_steps_sampled: 1085000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1085,28020.1,1085000,-3.1848,-1.99,-5.13,318.48


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1086000
  custom_metrics: {}
  date: 2021-11-05_20-19-43
  done: false
  episode_len_mean: 320.07
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.2006999999999755
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 3
  episodes_total: 4064
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29442754256797343
          cur_lr: 5.000000000000001e-05
          entropy: 0.9267644964986377
          entropy_coeff: 0.009999999999999998
          kl: 0.009770058765998929
          policy_loss: -0.0770849828918775
          total_loss: -0.0717111171119743
          vf_explained_var: 0.12455877661705017
          vf_loss: 0.01176493696661459
    num_agent_steps_sampled: 1086000
    num_agent_steps_trained: 1086000
    num_steps_sampled: 1086000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1086,28042.4,1086000,-3.2007,-1.99,-5.13,320.07


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1087000
  custom_metrics: {}
  date: 2021-11-05_20-20-07
  done: false
  episode_len_mean: 322.13
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.2212999999999754
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 4
  episodes_total: 4068
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29442754256797343
          cur_lr: 5.000000000000001e-05
          entropy: 0.9576242168744405
          entropy_coeff: 0.009999999999999998
          kl: 0.005838038080916795
          policy_loss: 0.01824100630150901
          total_loss: 0.023747587824861208
          vf_explained_var: 0.10070621967315674
          vf_loss: 0.013363947584811185
    num_agent_steps_sampled: 1087000
    num_agent_steps_trained: 1087000
    num_steps_sampled: 1087000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1087,28065.7,1087000,-3.2213,-1.99,-5.13,322.13


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1088000
  custom_metrics: {}
  date: 2021-11-05_20-20-30
  done: false
  episode_len_mean: 324.01
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.240099999999975
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 4
  episodes_total: 4072
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29442754256797343
          cur_lr: 5.000000000000001e-05
          entropy: 1.0260768400298224
          entropy_coeff: 0.009999999999999998
          kl: 0.006606797695599869
          policy_loss: 0.018768070514003435
          total_loss: 0.024216574761602612
          vf_explained_var: 0.10131831467151642
          vf_loss: 0.013764052993307511
    num_agent_steps_sampled: 1088000
    num_agent_steps_trained: 1088000
    num_steps_sampled: 1088000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1088,28088.8,1088000,-3.2401,-1.99,-5.13,324.01


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1089000
  custom_metrics: {}
  date: 2021-11-05_20-20-54
  done: false
  episode_len_mean: 324.57
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.2456999999999736
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 3
  episodes_total: 4075
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29442754256797343
          cur_lr: 5.000000000000001e-05
          entropy: 0.7466471711794536
          entropy_coeff: 0.009999999999999998
          kl: 0.004714443634316763
          policy_loss: -0.08872383998499976
          total_loss: -0.08491282314062118
          vf_explained_var: 0.12950356304645538
          vf_loss: 0.009889425890934136
    num_agent_steps_sampled: 1089000
    num_agent_steps_trained: 1089000
    num_steps_sampled: 1089000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1089,28113.1,1089000,-3.2457,-1.99,-5.13,324.57


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1090000
  custom_metrics: {}
  date: 2021-11-05_20-21-19
  done: false
  episode_len_mean: 323.81
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.2380999999999758
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 4
  episodes_total: 4079
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14721377128398672
          cur_lr: 5.000000000000001e-05
          entropy: 0.9448617339134217
          entropy_coeff: 0.009999999999999998
          kl: 0.013290123014053279
          policy_loss: -0.050032528820965025
          total_loss: -0.044334513942400615
          vf_explained_var: 0.15634365379810333
          vf_loss: 0.01319014226189918
    num_agent_steps_sampled: 1090000
    num_agent_steps_trained: 1090000
    num_steps_sampled: 1090000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1090,28137.7,1090000,-3.2381,-1.99,-5.13,323.81




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1091000
  custom_metrics: {}
  date: 2021-11-05_20-22-02
  done: false
  episode_len_mean: 321.71
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.217099999999976
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 4
  episodes_total: 4083
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14721377128398672
          cur_lr: 5.000000000000001e-05
          entropy: 0.8627740793757969
          entropy_coeff: 0.009999999999999998
          kl: 0.012060558268709245
          policy_loss: -0.11100098076793882
          total_loss: -0.10108647073308627
          vf_explained_var: 0.1366586983203888
          vf_loss: 0.01676676880257825
    num_agent_steps_sampled: 1091000
    num_agent_steps_trained: 1091000
    num_steps_sampled: 1091000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1091,28181.3,1091000,-3.2171,-1.99,-5.13,321.71


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1092000
  custom_metrics: {}
  date: 2021-11-05_20-22-27
  done: false
  episode_len_mean: 318.52
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.185199999999976
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 4
  episodes_total: 4087
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14721377128398672
          cur_lr: 5.000000000000001e-05
          entropy: 0.8380636208587222
          entropy_coeff: 0.009999999999999998
          kl: 0.009339709418813886
          policy_loss: -0.056979982554912566
          total_loss: -0.051168888890080984
          vf_explained_var: 0.09405144304037094
          vf_loss: 0.012816794806470473
    num_agent_steps_sampled: 1092000
    num_agent_steps_trained: 1092000
    num_steps_sampled: 1092000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1092,28205.5,1092000,-3.1852,-1.99,-5.13,318.52


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1093000
  custom_metrics: {}
  date: 2021-11-05_20-22-52
  done: false
  episode_len_mean: 317.49
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.1748999999999756
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 4
  episodes_total: 4091
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14721377128398672
          cur_lr: 5.000000000000001e-05
          entropy: 0.7618170036209955
          entropy_coeff: 0.009999999999999998
          kl: 0.005642487745945522
          policy_loss: -0.029348959690994686
          total_loss: -0.024184765832291708
          vf_explained_var: 0.1457059681415558
          vf_loss: 0.011951708659115766
    num_agent_steps_sampled: 1093000
    num_agent_steps_trained: 1093000
    num_steps_sampled: 1093000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1093,28230.7,1093000,-3.1749,-1.99,-5.13,317.49


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1094000
  custom_metrics: {}
  date: 2021-11-05_20-23-18
  done: false
  episode_len_mean: 308.17
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -3.081699999999978
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 4
  episodes_total: 4095
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14721377128398672
          cur_lr: 5.000000000000001e-05
          entropy: 0.5864994270933999
          entropy_coeff: 0.009999999999999998
          kl: 0.013472422583665239
          policy_loss: -0.11807223343186908
          total_loss: -0.1063350530134307
          vf_explained_var: 0.05847249552607536
          vf_loss: 0.015618851118617588
    num_agent_steps_sampled: 1094000
    num_agent_steps_trained: 1094000
    num_steps_sampled: 1094000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1094,28256.7,1094000,-3.0817,-1.99,-5.13,308.17


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1095000
  custom_metrics: {}
  date: 2021-11-05_20-23-43
  done: false
  episode_len_mean: 298.42
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.984199999999981
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 5
  episodes_total: 4100
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14721377128398672
          cur_lr: 5.000000000000001e-05
          entropy: 0.6773334539598889
          entropy_coeff: 0.009999999999999998
          kl: 0.006380921421549917
          policy_loss: 0.024739240606625874
          total_loss: 0.03181590520673328
          vf_explained_var: 0.07557212561368942
          vf_loss: 0.012910636172940334
    num_agent_steps_sampled: 1095000
    num_agent_steps_trained: 1095000
    num_steps_sampled: 1095000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1095,28282,1095000,-2.9842,-1.99,-5.13,298.42


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1096000
  custom_metrics: {}
  date: 2021-11-05_20-24-09
  done: false
  episode_len_mean: 292.57
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.9256999999999813
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 4
  episodes_total: 4104
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14721377128398672
          cur_lr: 5.000000000000001e-05
          entropy: 0.7710010310014089
          entropy_coeff: 0.009999999999999998
          kl: 0.0038393370515355705
          policy_loss: 0.040654177798165214
          total_loss: 0.045022695511579516
          vf_explained_var: 0.08602187037467957
          vf_loss: 0.011513327889972263
    num_agent_steps_sampled: 1096000
    num_agent_steps_trained: 1096000
    num_steps_sampled: 1096000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1096,28308.2,1096000,-2.9257,-1.99,-5.13,292.57


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1097000
  custom_metrics: {}
  date: 2021-11-05_20-24-37
  done: false
  episode_len_mean: 288.47
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.8846999999999823
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 4
  episodes_total: 4108
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07360688564199336
          cur_lr: 5.000000000000001e-05
          entropy: 0.671305740541882
          entropy_coeff: 0.009999999999999998
          kl: 0.010707605492730466
          policy_loss: 0.021525475051667957
          total_loss: 0.026686895224783156
          vf_explained_var: 0.02739999070763588
          vf_loss: 0.01108632322607769
    num_agent_steps_sampled: 1097000
    num_agent_steps_trained: 1097000
    num_steps_sampled: 1097000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1097,28335.9,1097000,-2.8847,-1.99,-5.13,288.47




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1098000
  custom_metrics: {}
  date: 2021-11-05_20-25-23
  done: false
  episode_len_mean: 285.41
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.854099999999983
  episode_reward_min: -5.129999999999935
  episodes_this_iter: 4
  episodes_total: 4112
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07360688564199336
          cur_lr: 5.000000000000001e-05
          entropy: 0.4694870932234658
          entropy_coeff: 0.009999999999999998
          kl: 0.004834747032881006
          policy_loss: -0.027061053324076864
          total_loss: -0.01838412433862686
          vf_explained_var: 0.15603020787239075
          vf_loss: 0.013015929609537125
    num_agent_steps_sampled: 1098000
    num_agent_steps_trained: 1098000
    num_steps_sampled: 1098000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1098,28381.3,1098000,-2.8541,-1.99,-5.13,285.41


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1099000
  custom_metrics: {}
  date: 2021-11-05_20-25-49
  done: false
  episode_len_mean: 275.52
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.755199999999985
  episode_reward_min: -4.519999999999948
  episodes_this_iter: 5
  episodes_total: 4117
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03680344282099668
          cur_lr: 5.000000000000001e-05
          entropy: 0.4952389664120144
          entropy_coeff: 0.009999999999999998
          kl: 0.016364484789085684
          policy_loss: -0.00648949287003941
          total_loss: 0.0067889507446024155
          vf_explained_var: 0.13332009315490723
          vf_loss: 0.017628564240617886
    num_agent_steps_sampled: 1099000
    num_agent_steps_trained: 1099000
    num_steps_sampled: 1099000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1099,28408,1099000,-2.7552,-1.99,-4.52,275.52


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1100000
  custom_metrics: {}
  date: 2021-11-05_20-26-14
  done: false
  episode_len_mean: 271.35
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.713499999999986
  episode_reward_min: -4.509999999999948
  episodes_this_iter: 4
  episodes_total: 4121
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03680344282099668
          cur_lr: 5.000000000000001e-05
          entropy: 0.6702331079377069
          entropy_coeff: 0.009999999999999998
          kl: 0.03916759852393075
          policy_loss: -0.022042989979187647
          total_loss: -0.012865079111523098
          vf_explained_var: 0.11380172520875931
          vf_loss: 0.014438736790584192
    num_agent_steps_sampled: 1100000
    num_agent_steps_trained: 1100000
    num_steps_sampled: 1100000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1100,28432.3,1100000,-2.7135,-2.08,-4.51,271.35


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1101000
  custom_metrics: {}
  date: 2021-11-05_20-26-39
  done: false
  episode_len_mean: 270.1
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.700999999999986
  episode_reward_min: -4.509999999999948
  episodes_this_iter: 3
  episodes_total: 4124
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05520516423149502
          cur_lr: 5.000000000000001e-05
          entropy: 0.6852938638793098
          entropy_coeff: 0.009999999999999998
          kl: 0.022501412403600637
          policy_loss: -0.10995963944329155
          total_loss: -0.1008944797847006
          vf_explained_var: 0.12223133444786072
          vf_loss: 0.014675905431310336
    num_agent_steps_sampled: 1101000
    num_agent_steps_trained: 1101000
    num_steps_sampled: 1101000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1101,28457.3,1101000,-2.701,-2.08,-4.51,270.1


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1102000
  custom_metrics: {}
  date: 2021-11-05_20-27-06
  done: false
  episode_len_mean: 268.25
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.682499999999987
  episode_reward_min: -4.509999999999948
  episodes_this_iter: 5
  episodes_total: 4129
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08280774634724253
          cur_lr: 5.000000000000001e-05
          entropy: 0.3434233299560017
          entropy_coeff: 0.009999999999999998
          kl: 0.010626697542079771
          policy_loss: -0.004721088086565336
          total_loss: 0.004689746846755345
          vf_explained_var: 0.1718308925628662
          vf_loss: 0.011965092747575706
    num_agent_steps_sampled: 1102000
    num_agent_steps_trained: 1102000
    num_steps_sampled: 1102000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1102,28484.2,1102000,-2.6825,-2.08,-4.51,268.25


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1103000
  custom_metrics: {}
  date: 2021-11-05_20-27-32
  done: false
  episode_len_mean: 264.69
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.6468999999999876
  episode_reward_min: -4.509999999999948
  episodes_this_iter: 4
  episodes_total: 4133
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08280774634724253
          cur_lr: 5.000000000000001e-05
          entropy: 0.3681077480316162
          entropy_coeff: 0.009999999999999998
          kl: 0.008102276215481336
          policy_loss: 0.059932629929648505
          total_loss: 0.06741519657274088
          vf_explained_var: 0.19673994183540344
          vf_loss: 0.01049271331479152
    num_agent_steps_sampled: 1103000
    num_agent_steps_trained: 1103000
    num_steps_sampled: 1103000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1103,28510.4,1103000,-2.6469,-2.08,-4.51,264.69


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1104000
  custom_metrics: {}
  date: 2021-11-05_20-27-59
  done: false
  episode_len_mean: 260.87
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.6086999999999874
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 4137
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08280774634724253
          cur_lr: 5.000000000000001e-05
          entropy: 0.44022223816977607
          entropy_coeff: 0.009999999999999998
          kl: 0.011809309665502513
          policy_loss: -0.009135726880696084
          total_loss: 0.0012383049560917747
          vf_explained_var: 0.20420525968074799
          vf_loss: 0.013798349971572558
    num_agent_steps_sampled: 1104000
    num_agent_steps_trained: 1104000
    num_steps_sampled: 1104000
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1104,28537.3,1104000,-2.6087,-2.08,-4.09,260.87




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1105000
  custom_metrics: {}
  date: 2021-11-05_20-28-43
  done: false
  episode_len_mean: 255.77
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.5576999999999894
  episode_reward_min: -3.8199999999999625
  episodes_this_iter: 5
  episodes_total: 4142
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08280774634724253
          cur_lr: 5.000000000000001e-05
          entropy: 0.528984346985817
          entropy_coeff: 0.009999999999999998
          kl: 0.008582594756001703
          policy_loss: -0.03986520717541377
          total_loss: -0.027979333202044168
          vf_explained_var: 0.2997289299964905
          vf_loss: 0.01646501265673174
    num_agent_steps_sampled: 1105000
    num_agent_steps_trained: 1105000
    num_steps_sampled: 1105000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1105,28581.1,1105000,-2.5577,-2.08,-3.82,255.77


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1106000
  custom_metrics: {}
  date: 2021-11-05_20-29-08
  done: false
  episode_len_mean: 251.12
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.5111999999999903
  episode_reward_min: -3.5999999999999672
  episodes_this_iter: 4
  episodes_total: 4146
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08280774634724253
          cur_lr: 5.000000000000001e-05
          entropy: 0.7679091771443685
          entropy_coeff: 0.009999999999999998
          kl: 0.010117058596708261
          policy_loss: 0.004558383093939887
          total_loss: 0.006811337835258908
          vf_explained_var: 0.533454179763794
          vf_loss: 0.009094276315429145
    num_agent_steps_sampled: 1106000
    num_agent_steps_trained: 1106000
    num_steps_sampled: 1106000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1106,28606.4,1106000,-2.5112,-2.08,-3.6,251.12


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1107000
  custom_metrics: {}
  date: 2021-11-05_20-29-32
  done: false
  episode_len_mean: 250.68
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.5067999999999904
  episode_reward_min: -3.3599999999999723
  episodes_this_iter: 3
  episodes_total: 4149
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08280774634724253
          cur_lr: 5.000000000000001e-05
          entropy: 0.7031609349780612
          entropy_coeff: 0.009999999999999998
          kl: 0.01059913793115028
          policy_loss: -0.11697190677126248
          total_loss: -0.11130163470904032
          vf_explained_var: 0.2514524459838867
          vf_loss: 0.011824189721503191
    num_agent_steps_sampled: 1107000
    num_agent_steps_trained: 1107000
    num_steps_sampled: 1107000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1107,28630.2,1107000,-2.5068,-2.08,-3.36,250.68


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1108000
  custom_metrics: {}
  date: 2021-11-05_20-29-55
  done: false
  episode_len_mean: 251.61
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.51609999999999
  episode_reward_min: -3.3599999999999723
  episodes_this_iter: 4
  episodes_total: 4153
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08280774634724253
          cur_lr: 5.000000000000001e-05
          entropy: 0.8991262230608198
          entropy_coeff: 0.009999999999999998
          kl: 0.023169427399135144
          policy_loss: 0.023793101641866897
          total_loss: 0.027977227005693647
          vf_explained_var: 0.24901023507118225
          vf_loss: 0.011256779885540406
    num_agent_steps_sampled: 1108000
    num_agent_steps_trained: 1108000
    num_steps_sampled: 1108000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1108,28653.4,1108000,-2.5161,-2.08,-3.36,251.61


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1109000
  custom_metrics: {}
  date: 2021-11-05_20-30-20
  done: false
  episode_len_mean: 250.63
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.50629999999999
  episode_reward_min: -3.3599999999999723
  episodes_this_iter: 4
  episodes_total: 4157
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12421161952086379
          cur_lr: 5.000000000000001e-05
          entropy: 0.9395799044105742
          entropy_coeff: 0.009999999999999998
          kl: 0.02988843739780395
          policy_loss: -0.019551185104582043
          total_loss: -0.013624372167719735
          vf_explained_var: 0.22214363515377045
          vf_loss: 0.011610123613435362
    num_agent_steps_sampled: 1109000
    num_agent_steps_trained: 1109000
    num_steps_sampled: 1109000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1109,28678,1109000,-2.5063,-2.08,-3.36,250.63


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1110000
  custom_metrics: {}
  date: 2021-11-05_20-30-43
  done: false
  episode_len_mean: 250.48
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.5047999999999906
  episode_reward_min: -3.3599999999999723
  episodes_this_iter: 4
  episodes_total: 4161
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1863174292812957
          cur_lr: 5.000000000000001e-05
          entropy: 1.1062747855981192
          entropy_coeff: 0.009999999999999998
          kl: 0.013572354614046213
          policy_loss: 0.015535416081547737
          total_loss: 0.019351230478949016
          vf_explained_var: 0.10151306539773941
          vf_loss: 0.012349796046813329
    num_agent_steps_sampled: 1110000
    num_agent_steps_trained: 1110000
    num_steps_sampled: 1110000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1110,28701.3,1110000,-2.5048,-2.08,-3.36,250.48


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1111000
  custom_metrics: {}
  date: 2021-11-05_20-31-08
  done: false
  episode_len_mean: 249.74
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.4973999999999905
  episode_reward_min: -3.3599999999999723
  episodes_this_iter: 3
  episodes_total: 4164
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1863174292812957
          cur_lr: 5.000000000000001e-05
          entropy: 0.7541390478610992
          entropy_coeff: 0.009999999999999998
          kl: 0.010271679367266667
          policy_loss: -0.07889266394906574
          total_loss: -0.073853673454788
          vf_explained_var: 0.3519514501094818
          vf_loss: 0.010666589449263282
    num_agent_steps_sampled: 1111000
    num_agent_steps_trained: 1111000
    num_steps_sampled: 1111000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1111,28725.9,1111000,-2.4974,-2.08,-3.36,249.74


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1112000
  custom_metrics: {}
  date: 2021-11-05_20-31-32
  done: false
  episode_len_mean: 248.92
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.4891999999999905
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 4168
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1863174292812957
          cur_lr: 5.000000000000001e-05
          entropy: 0.6508394055896335
          entropy_coeff: 0.009999999999999998
          kl: 0.007474716880145612
          policy_loss: -0.09091135048203998
          total_loss: -0.0822785657313135
          vf_explained_var: 0.1533380001783371
          vf_loss: 0.013748504174873232
    num_agent_steps_sampled: 1112000
    num_agent_steps_trained: 1112000
    num_steps_sampled: 1112000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1112,28750,1112000,-2.4892,-2.08,-3.35,248.92




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1113000
  custom_metrics: {}
  date: 2021-11-05_20-32-15
  done: false
  episode_len_mean: 247.74
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.477399999999991
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 5
  episodes_total: 4173
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1863174292812957
          cur_lr: 5.000000000000001e-05
          entropy: 0.5620934999651379
          entropy_coeff: 0.009999999999999998
          kl: 0.00466195186260197
          policy_loss: -0.011695328396227625
          total_loss: -0.0042292411542601055
          vf_explained_var: 0.3008195161819458
          vf_loss: 0.012218420476549202
    num_agent_steps_sampled: 1113000
    num_agent_steps_trained: 1113000
    num_steps_sampled: 1113000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1113,28793.4,1113000,-2.4774,-2.08,-3.35,247.74


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1114000
  custom_metrics: {}
  date: 2021-11-05_20-32-42
  done: false
  episode_len_mean: 246.03
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.4602999999999913
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 4177
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09315871464064784
          cur_lr: 5.000000000000001e-05
          entropy: 0.4597627411286036
          entropy_coeff: 0.009999999999999998
          kl: 0.007442094843918312
          policy_loss: 0.04630331214931276
          total_loss: 0.05331257374750243
          vf_explained_var: 0.0827663242816925
          vf_loss: 0.010913594408581655
    num_agent_steps_sampled: 1114000
    num_agent_steps_trained: 1114000
    num_steps_sampled: 1114000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1114,28820,1114000,-2.4603,-2.08,-3.35,246.03


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1115000
  custom_metrics: {}
  date: 2021-11-05_20-33-08
  done: false
  episode_len_mean: 245.59
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.4558999999999913
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 4181
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09315871464064784
          cur_lr: 5.000000000000001e-05
          entropy: 0.49551508559121027
          entropy_coeff: 0.009999999999999998
          kl: 0.004752515824103456
          policy_loss: 0.016125197956959405
          total_loss: 0.023371267980999418
          vf_explained_var: 0.020621394738554955
          vf_loss: 0.011758480935047071
    num_agent_steps_sampled: 1115000
    num_agent_steps_trained: 1115000
    num_steps_sampled: 1115000
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1115,28846.5,1115000,-2.4559,-2.08,-3.35,245.59


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1116000
  custom_metrics: {}
  date: 2021-11-05_20-33-35
  done: false
  episode_len_mean: 244.81
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.4480999999999913
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 4185
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04657935732032392
          cur_lr: 5.000000000000001e-05
          entropy: 0.5169584350453482
          entropy_coeff: 0.009999999999999998
          kl: 0.007011194906005357
          policy_loss: -0.08980321288108825
          total_loss: -0.08304260816011164
          vf_explained_var: 0.17112421989440918
          vf_loss: 0.011603612535529667
    num_agent_steps_sampled: 1116000
    num_agent_steps_trained: 1116000
    num_steps_sampled: 1116000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1116,28873.3,1116000,-2.4481,-2.08,-3.35,244.81


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1117000
  custom_metrics: {}
  date: 2021-11-05_20-34-01
  done: false
  episode_len_mean: 244.28
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.442799999999992
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 5
  episodes_total: 4190
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04657935732032392
          cur_lr: 5.000000000000001e-05
          entropy: 0.5512671944167878
          entropy_coeff: 0.009999999999999998
          kl: 0.007925822353137171
          policy_loss: -0.006772146249810855
          total_loss: 0.002786392511592971
          vf_explained_var: 0.06766990572214127
          vf_loss: 0.014702030747301049
    num_agent_steps_sampled: 1117000
    num_agent_steps_trained: 1117000
    num_steps_sampled: 1117000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1117,28898.7,1117000,-2.4428,-2.08,-3.35,244.28


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1118000
  custom_metrics: {}
  date: 2021-11-05_20-34-28
  done: false
  episode_len_mean: 243.58
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.435799999999992
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 4194
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04657935732032392
          cur_lr: 5.000000000000001e-05
          entropy: 0.36165155669053395
          entropy_coeff: 0.009999999999999998
          kl: 0.0020519180166532784
          policy_loss: 0.024301104992628098
          total_loss: 0.03166695311665535
          vf_explained_var: 0.07719837874174118
          vf_loss: 0.010886789641032617
    num_agent_steps_sampled: 1118000
    num_agent_steps_trained: 1118000
    num_steps_sampled: 1118000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1118,28925.8,1118000,-2.4358,-2.08,-3.35,243.58


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1119000
  custom_metrics: {}
  date: 2021-11-05_20-34-55
  done: false
  episode_len_mean: 243.22
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.432199999999992
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 4198
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02328967866016196
          cur_lr: 5.000000000000001e-05
          entropy: 0.4078698244359758
          entropy_coeff: 0.009999999999999998
          kl: 0.0031082928274712017
          policy_loss: -0.0030237514939573077
          total_loss: 0.0045511966778172385
          vf_explained_var: 0.06906959414482117
          vf_loss: 0.011581251221812434
    num_agent_steps_sampled: 1119000
    num_agent_steps_trained: 1119000
    num_steps_sampled: 1119000
    num_steps_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1119,28952.7,1119000,-2.4322,-2.08,-3.35,243.22




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1120000
  custom_metrics: {}
  date: 2021-11-05_20-35-40
  done: false
  episode_len_mean: 242.71
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.4270999999999923
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 5
  episodes_total: 4203
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01164483933008098
          cur_lr: 5.000000000000001e-05
          entropy: 0.44964352349440256
          entropy_coeff: 0.009999999999999998
          kl: 0.009642780673157207
          policy_loss: -0.014640876319673327
          total_loss: -0.003887594160106447
          vf_explained_var: 0.1198372170329094
          vf_loss: 0.015137424847731987
    num_agent_steps_sampled: 1120000
    num_agent_steps_trained: 1120000
    num_steps_sampled: 1120000
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1120,28997.7,1120000,-2.4271,-2.08,-3.35,242.71


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1121000
  custom_metrics: {}
  date: 2021-11-05_20-36-09
  done: false
  episode_len_mean: 242.29
  episode_media: {}
  episode_reward_max: -2.0799999999999996
  episode_reward_mean: -2.4228999999999923
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 4207
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01164483933008098
          cur_lr: 5.000000000000001e-05
          entropy: 0.5393389119042291
          entropy_coeff: 0.009999999999999998
          kl: 0.005854819581516324
          policy_loss: 0.017466164463096196
          total_loss: 0.022875984758138658
          vf_explained_var: 0.24085235595703125
          vf_loss: 0.010735027367870013
    num_agent_steps_sampled: 1121000
    num_agent_steps_trained: 1121000
    num_steps_sampled: 1121000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1121,29026.8,1121000,-2.4229,-2.08,-3.35,242.29


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1122000
  custom_metrics: {}
  date: 2021-11-05_20-36-36
  done: false
  episode_len_mean: 242.42
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.424199999999992
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 4211
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01164483933008098
          cur_lr: 5.000000000000001e-05
          entropy: 0.42094981537924875
          entropy_coeff: 0.009999999999999998
          kl: 0.0075623469818854655
          policy_loss: -0.04437541349066628
          total_loss: -0.03544684698184331
          vf_explained_var: 0.034118592739105225
          vf_loss: 0.013050004850245184
    num_agent_steps_sampled: 1122000
    num_agent_steps_trained: 1122000
    num_steps_sampled: 1122000
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1122,29053.6,1122000,-2.4242,-2.12,-3.35,242.42


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1123000
  custom_metrics: {}
  date: 2021-11-05_20-37-02
  done: false
  episode_len_mean: 242.49
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.4248999999999925
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 5
  episodes_total: 4216
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01164483933008098
          cur_lr: 5.000000000000001e-05
          entropy: 0.6008783529202143
          entropy_coeff: 0.009999999999999998
          kl: 0.015183591583698922
          policy_loss: -0.019857162568304273
          total_loss: -0.01194723571340243
          vf_explained_var: 0.268551230430603
          vf_loss: 0.013741898428027829
    num_agent_steps_sampled: 1123000
    num_agent_steps_trained: 1123000
    num_steps_sampled: 1123000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1123,29079.9,1123000,-2.4249,-2.12,-3.35,242.49


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1124000
  custom_metrics: {}
  date: 2021-11-05_20-37-29
  done: false
  episode_len_mean: 241.81
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.418099999999992
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 4220
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01164483933008098
          cur_lr: 5.000000000000001e-05
          entropy: 0.5271756331125895
          entropy_coeff: 0.009999999999999998
          kl: 0.021625700430983368
          policy_loss: -0.02450335832933585
          total_loss: -0.016309566216336356
          vf_explained_var: 0.13133171200752258
          vf_loss: 0.013213716809534364
    num_agent_steps_sampled: 1124000
    num_agent_steps_trained: 1124000
    num_steps_sampled: 1124000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1124,29106.6,1124000,-2.4181,-2.12,-3.35,241.81


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1125000
  custom_metrics: {}
  date: 2021-11-05_20-37-54
  done: false
  episode_len_mean: 241.5
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.4149999999999925
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 4224
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01746725899512147
          cur_lr: 5.000000000000001e-05
          entropy: 0.8282611668109894
          entropy_coeff: 0.009999999999999998
          kl: 0.012738547382895425
          policy_loss: -0.00045315399765968325
          total_loss: 0.0033783571587668523
          vf_explained_var: 0.13704834878444672
          vf_loss: 0.011891616808457506
    num_agent_steps_sampled: 1125000
    num_agent_steps_trained: 1125000
    num_steps_sampled: 1125000
    num_steps_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1125,29131.6,1125000,-2.415,-2.12,-3.35,241.5


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1126000
  custom_metrics: {}
  date: 2021-11-05_20-38-17
  done: false
  episode_len_mean: 242.78
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.427799999999992
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 4228
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01746725899512147
          cur_lr: 5.000000000000001e-05
          entropy: 0.9648531185256111
          entropy_coeff: 0.009999999999999998
          kl: 0.0156861677341488
          policy_loss: 0.018951443831125896
          total_loss: 0.020023750596576267
          vf_explained_var: 0.03739705681800842
          vf_loss: 0.010446845921170381
    num_agent_steps_sampled: 1126000
    num_agent_steps_trained: 1126000
    num_steps_sampled: 1126000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1126,29155.2,1126000,-2.4278,-2.12,-3.35,242.78




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1127000
  custom_metrics: {}
  date: 2021-11-05_20-39-02
  done: false
  episode_len_mean: 242.64
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.426399999999992
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 4232
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01746725899512147
          cur_lr: 5.000000000000001e-05
          entropy: 0.577262196275923
          entropy_coeff: 0.009999999999999998
          kl: 0.009068136563696545
          policy_loss: 0.017247039824724197
          total_loss: 0.023669559756914774
          vf_explained_var: 0.14016495645046234
          vf_loss: 0.012036746378160185
    num_agent_steps_sampled: 1127000
    num_agent_steps_trained: 1127000
    num_steps_sampled: 1127000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1127,29200.3,1127000,-2.4264,-2.12,-3.35,242.64


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1128000
  custom_metrics: {}
  date: 2021-11-05_20-39-29
  done: false
  episode_len_mean: 242.41
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.424099999999992
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 4236
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01746725899512147
          cur_lr: 5.000000000000001e-05
          entropy: 0.4615162778231833
          entropy_coeff: 0.009999999999999998
          kl: 0.015853332781837697
          policy_loss: -0.06462418143120077
          total_loss: -0.05666258877350224
          vf_explained_var: 0.08886786550283432
          vf_loss: 0.01229983710994323
    num_agent_steps_sampled: 1128000
    num_agent_steps_trained: 1128000
    num_steps_sampled: 1128000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1128,29226.5,1128000,-2.4241,-2.12,-3.35,242.41


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1129000
  custom_metrics: {}
  date: 2021-11-05_20-39-53
  done: false
  episode_len_mean: 243.65
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.4364999999999917
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 4240
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01746725899512147
          cur_lr: 5.000000000000001e-05
          entropy: 1.1150814685556623
          entropy_coeff: 0.009999999999999998
          kl: 0.07409419774045681
          policy_loss: 0.003978995978832245
          total_loss: 0.006811031533612145
          vf_explained_var: 0.19156667590141296
          vf_loss: 0.012688626783589523
    num_agent_steps_sampled: 1129000
    num_agent_steps_trained: 1129000
    num_steps_sampled: 1129000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1129,29250.5,1129000,-2.4365,-2.12,-3.35,243.65


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1130000
  custom_metrics: {}
  date: 2021-11-05_20-40-16
  done: false
  episode_len_mean: 244.21
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.442099999999992
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 4244
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02620088849268221
          cur_lr: 5.000000000000001e-05
          entropy: 1.0160572826862335
          entropy_coeff: 0.009999999999999998
          kl: 0.010013436182673353
          policy_loss: 0.010143804964092043
          total_loss: 0.012721212208271026
          vf_explained_var: 0.0809032991528511
          vf_loss: 0.012475624473558532
    num_agent_steps_sampled: 1130000
    num_agent_steps_trained: 1130000
    num_steps_sampled: 1130000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1130,29274.2,1130000,-2.4421,-2.12,-3.35,244.21


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1131000
  custom_metrics: {}
  date: 2021-11-05_20-40-40
  done: false
  episode_len_mean: 245.34
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.4533999999999914
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 4248
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02620088849268221
          cur_lr: 5.000000000000001e-05
          entropy: 1.1425125704871284
          entropy_coeff: 0.009999999999999998
          kl: 0.031660989276239504
          policy_loss: 0.002052386850118637
          total_loss: 0.0044210496048132574
          vf_explained_var: 0.14098571240901947
          vf_loss: 0.012964241858571767
    num_agent_steps_sampled: 1131000
    num_agent_steps_trained: 1131000
    num_steps_sampled: 1131000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1131,29297.8,1131000,-2.4534,-2.12,-3.35,245.34


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1132000
  custom_metrics: {}
  date: 2021-11-05_20-41-04
  done: false
  episode_len_mean: 244.48
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.4447999999999914
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 3
  episodes_total: 4251
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.039301332739023316
          cur_lr: 5.000000000000001e-05
          entropy: 1.0160654445489248
          entropy_coeff: 0.009999999999999998
          kl: 0.009671802370315893
          policy_loss: -0.0019117688139279683
          total_loss: -0.0025150924921035767
          vf_explained_var: -0.03591112047433853
          vf_loss: 0.009177217034933467
    num_agent_steps_sampled: 1132000
    num_agent_steps_trained: 1132000
    num_steps_sampled: 1132000
    num_ste

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1132,29321.3,1132000,-2.4448,-2.12,-3.35,244.48


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1133000
  custom_metrics: {}
  date: 2021-11-05_20-41-26
  done: false
  episode_len_mean: 245.76
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.4575999999999913
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 4255
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.039301332739023316
          cur_lr: 5.000000000000001e-05
          entropy: 0.9864324225319756
          entropy_coeff: 0.009999999999999998
          kl: 0.0277025556337303
          policy_loss: 0.011831193168958028
          total_loss: 0.015210391167137358
          vf_explained_var: 0.17371876537799835
          vf_loss: 0.01215477402632435
    num_agent_steps_sampled: 1133000
    num_agent_steps_trained: 1133000
    num_steps_sampled: 1133000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1133,29343.6,1133000,-2.4576,-2.12,-3.35,245.76


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1134000
  custom_metrics: {}
  date: 2021-11-05_20-41-48
  done: false
  episode_len_mean: 246.2
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.4619999999999913
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 3
  episodes_total: 4258
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.058951999108534985
          cur_lr: 5.000000000000001e-05
          entropy: 1.037557805246777
          entropy_coeff: 0.009999999999999998
          kl: 0.015299063076426558
          policy_loss: -0.10202849060297012
          total_loss: -0.09910882843865289
          vf_explained_var: 0.07347403466701508
          vf_loss: 0.012393332148591677
    num_agent_steps_sampled: 1134000
    num_agent_steps_trained: 1134000
    num_steps_sampled: 1134000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1134,29366.1,1134000,-2.462,-2.12,-3.35,246.2




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1135000
  custom_metrics: {}
  date: 2021-11-05_20-42-28
  done: false
  episode_len_mean: 247.09
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.4708999999999914
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 4262
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.058951999108534985
          cur_lr: 5.000000000000001e-05
          entropy: 1.056257329384486
          entropy_coeff: 0.009999999999999998
          kl: 0.012848825310077435
          policy_loss: 0.030286094794670742
          total_loss: 0.032987171825435424
          vf_explained_var: 0.05905897915363312
          vf_loss: 0.01250618551340368
    num_agent_steps_sampled: 1135000
    num_agent_steps_trained: 1135000
    num_steps_sampled: 1135000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1135,29405.7,1135000,-2.4709,-2.12,-3.32,247.09


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1136000
  custom_metrics: {}
  date: 2021-11-05_20-42-50
  done: false
  episode_len_mean: 247.23
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.4722999999999913
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 3
  episodes_total: 4265
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.058951999108534985
          cur_lr: 5.000000000000001e-05
          entropy: 0.9048602296246423
          entropy_coeff: 0.009999999999999998
          kl: 0.02120382063125482
          policy_loss: -0.10577719410260518
          total_loss: -0.10074634133941597
          vf_explained_var: 0.07681460678577423
          vf_loss: 0.012829448738031917
    num_agent_steps_sampled: 1136000
    num_agent_steps_trained: 1136000
    num_steps_sampled: 1136000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1136,29428.1,1136000,-2.4723,-2.12,-3.32,247.23


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1137000
  custom_metrics: {}
  date: 2021-11-05_20-43-13
  done: false
  episode_len_mean: 248.23
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.482299999999991
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 4269
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 0.7884410202503205
          entropy_coeff: 0.009999999999999998
          kl: 0.01783543176732765
          policy_loss: -0.0019786476261085934
          total_loss: 0.004349145458804236
          vf_explained_var: 0.06889670342206955
          vf_loss: 0.012635055815594064
    num_agent_steps_sampled: 1137000
    num_agent_steps_trained: 1137000
    num_steps_sampled: 1137000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1137,29450.6,1137000,-2.4823,-2.12,-3.32,248.23


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1138000
  custom_metrics: {}
  date: 2021-11-05_20-43-37
  done: false
  episode_len_mean: 249.44
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.4943999999999904
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 4273
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 0.7787038538191053
          entropy_coeff: 0.009999999999999998
          kl: 0.006830735363176959
          policy_loss: 0.05578323486778471
          total_loss: 0.05962853448258506
          vf_explained_var: 0.1013464629650116
          vf_loss: 0.011028310739331776
    num_agent_steps_sampled: 1138000
    num_agent_steps_trained: 1138000
    num_steps_sampled: 1138000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1138,29474.9,1138000,-2.4944,-2.12,-3.32,249.44


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1139000
  custom_metrics: {}
  date: 2021-11-05_20-44-03
  done: false
  episode_len_mean: 249.97
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.499699999999991
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 4277
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 0.8012101378705766
          entropy_coeff: 0.009999999999999998
          kl: 0.011982128912379784
          policy_loss: 0.025105136384566625
          total_loss: 0.03168306781185998
          vf_explained_var: 0.08598547428846359
          vf_loss: 0.013530475211640199
    num_agent_steps_sampled: 1139000
    num_agent_steps_trained: 1139000
    num_steps_sampled: 1139000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1139,29500.2,1139000,-2.4997,-2.12,-3.32,249.97


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1140000
  custom_metrics: {}
  date: 2021-11-05_20-44-26
  done: false
  episode_len_mean: 251.33
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.5132999999999903
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 4281
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 0.7870249642266167
          entropy_coeff: 0.009999999999999998
          kl: 0.0061628918812494425
          policy_loss: -0.0027989159027735394
          total_loss: 0.002579685714509752
          vf_explained_var: 0.09842827916145325
          vf_loss: 0.012703879757059946
    num_agent_steps_sampled: 1140000
    num_agent_steps_trained: 1140000
    num_steps_sampled: 1140000
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1140,29523.6,1140000,-2.5133,-2.12,-3.32,251.33


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1141000
  custom_metrics: {}
  date: 2021-11-05_20-44-51
  done: false
  episode_len_mean: 252.16
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.52159999999999
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 4285
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 0.8751756171385447
          entropy_coeff: 0.009999999999999998
          kl: 0.007485789378279466
          policy_loss: 0.027336419125398
          total_loss: 0.032723869383335116
          vf_explained_var: 0.04643957316875458
          vf_loss: 0.013477251368264357
    num_agent_steps_sampled: 1141000
    num_agent_steps_trained: 1141000
    num_steps_sampled: 1141000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1141,29548.6,1141000,-2.5216,-2.12,-3.32,252.16


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1142000
  custom_metrics: {}
  date: 2021-11-05_20-45-15
  done: false
  episode_len_mean: 252.94
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.52939999999999
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 3
  episodes_total: 4288
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 0.871853361527125
          entropy_coeff: 0.009999999999999998
          kl: 0.013100736594371772
          policy_loss: 0.006391444636715783
          total_loss: 0.008352398210101657
          vf_explained_var: 0.03768995776772499
          vf_loss: 0.009521008505382471
    num_agent_steps_sampled: 1142000
    num_agent_steps_trained: 1142000
    num_steps_sampled: 1142000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1142,29572.4,1142000,-2.5294,-2.12,-3.32,252.94




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1143000
  custom_metrics: {}
  date: 2021-11-05_20-45-57
  done: false
  episode_len_mean: 254.05
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.5404999999999895
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 4292
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 0.9187858879566193
          entropy_coeff: 0.009999999999999998
          kl: 0.007855241129989091
          policy_loss: 0.000269867479801178
          total_loss: 0.004991656045118967
          vf_explained_var: 0.11627955734729767
          vf_loss: 0.013215019626335965
    num_agent_steps_sampled: 1143000
    num_agent_steps_trained: 1143000
    num_steps_sampled: 1143000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1143,29614.5,1143000,-2.5405,-2.12,-3.32,254.05


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1144000
  custom_metrics: {}
  date: 2021-11-05_20-46-22
  done: false
  episode_len_mean: 254.74
  episode_media: {}
  episode_reward_max: -2.1199999999999988
  episode_reward_mean: -2.54739999999999
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 4296
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 0.9215348468886482
          entropy_coeff: 0.009999999999999998
          kl: 0.008118748310858475
          policy_loss: 0.013096792871753376
          total_loss: 0.018653749922911324
          vf_explained_var: 0.05649952217936516
          vf_loss: 0.01405438252000345
    num_agent_steps_sampled: 1144000
    num_agent_steps_trained: 1144000
    num_steps_sampled: 1144000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1144,29638.9,1144000,-2.5474,-2.12,-3.32,254.74


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1145000
  custom_metrics: {}
  date: 2021-11-05_20-46-46
  done: false
  episode_len_mean: 256.1
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.5609999999999893
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 4300
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 0.8895926766925388
          entropy_coeff: 0.009999999999999998
          kl: 0.0110409597845318
          policy_loss: 0.023766391310426925
          total_loss: 0.02947614391644796
          vf_explained_var: 0.08018700033426285
          vf_loss: 0.01362934701351656
    num_agent_steps_sampled: 1145000
    num_agent_steps_trained: 1145000
    num_steps_sampled: 1145000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1145,29663.7,1145000,-2.561,-2.22,-3.32,256.1


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1146000
  custom_metrics: {}
  date: 2021-11-05_20-47-11
  done: false
  episode_len_mean: 257.11
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.5710999999999893
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 4304
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 0.904643342230055
          entropy_coeff: 0.009999999999999998
          kl: 0.01196800649976741
          policy_loss: 0.015687448614173465
          total_loss: 0.02246062747306294
          vf_explained_var: 0.10857359319925308
          vf_loss: 0.01476130343766676
    num_agent_steps_sampled: 1146000
    num_agent_steps_trained: 1146000
    num_steps_sampled: 1146000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1146,29688.1,1146000,-2.5711,-2.22,-3.32,257.11


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1147000
  custom_metrics: {}
  date: 2021-11-05_20-47-35
  done: false
  episode_len_mean: 257.95
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.5794999999999892
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 4308
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 0.9402209454112582
          entropy_coeff: 0.009999999999999998
          kl: 0.005704576012638856
          policy_loss: 0.030580495711829928
          total_loss: 0.036998561686939664
          vf_explained_var: 0.11460061371326447
          vf_loss: 0.015315829176041814
    num_agent_steps_sampled: 1147000
    num_agent_steps_trained: 1147000
    num_steps_sampled: 1147000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1147,29712.7,1147000,-2.5795,-2.22,-3.32,257.95


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1148000
  custom_metrics: {}
  date: 2021-11-05_20-47-58
  done: false
  episode_len_mean: 259.22
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.5921999999999885
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 3
  episodes_total: 4311
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 1.0186921444204118
          entropy_coeff: 0.009999999999999998
          kl: 0.010005066764358932
          policy_loss: -0.09887489908271366
          total_loss: -0.09124389547440741
          vf_explained_var: 0.033059682697057724
          vf_loss: 0.01693319868710306
    num_agent_steps_sampled: 1148000
    num_agent_steps_trained: 1148000
    num_steps_sampled: 1148000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1148,29735.6,1148000,-2.5922,-2.22,-3.32,259.22


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1149000
  custom_metrics: {}
  date: 2021-11-05_20-48-22
  done: false
  episode_len_mean: 260.31
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.603099999999989
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 4315
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 0.8928570946057638
          entropy_coeff: 0.009999999999999998
          kl: 0.013210276205122303
          policy_loss: -0.09875080461303393
          total_loss: -0.08626296809977956
          vf_explained_var: 0.02598751336336136
          vf_loss: 0.020248249504301284
    num_agent_steps_sampled: 1149000
    num_agent_steps_trained: 1149000
    num_steps_sampled: 1149000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1149,29759.7,1149000,-2.6031,-2.22,-3.32,260.31


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1150000
  custom_metrics: {}
  date: 2021-11-05_20-48-48
  done: false
  episode_len_mean: 261.58
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.6157999999999877
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 4319
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 0.864620773659812
          entropy_coeff: 0.009999999999999998
          kl: 0.012786606958566833
          policy_loss: -0.06299130270878474
          total_loss: -0.0553691399594148
          vf_explained_var: 0.09060300141572952
          vf_loss: 0.015137674680186643
    num_agent_steps_sampled: 1150000
    num_agent_steps_trained: 1150000
    num_steps_sampled: 1150000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1150,29784.7,1150000,-2.6158,-2.22,-3.32,261.58




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1151000
  custom_metrics: {}
  date: 2021-11-05_20-49-31
  done: false
  episode_len_mean: 261.32
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.613199999999988
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 4323
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 0.9126967973179287
          entropy_coeff: 0.009999999999999998
          kl: 0.019895126352399158
          policy_loss: -0.05920728589925501
          total_loss: -0.053354662656784055
          vf_explained_var: 0.3003856837749481
          vf_loss: 0.013220303169348174
    num_agent_steps_sampled: 1151000
    num_agent_steps_trained: 1151000
    num_steps_sampled: 1151000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1151,29828.6,1151000,-2.6132,-2.22,-3.32,261.32


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1152000
  custom_metrics: {}
  date: 2021-11-05_20-49-56
  done: false
  episode_len_mean: 261.24
  episode_media: {}
  episode_reward_max: -2.2199999999999966
  episode_reward_mean: -2.612399999999988
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 4327
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 0.9132614817884234
          entropy_coeff: 0.009999999999999998
          kl: 0.018668210490729267
          policy_loss: 0.012430676569541296
          total_loss: 0.01772082522511482
          vf_explained_var: 0.031469088047742844
          vf_loss: 0.012771967322462136
    num_agent_steps_sampled: 1152000
    num_agent_steps_trained: 1152000
    num_steps_sampled: 1152000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1152,29852.9,1152000,-2.6124,-2.22,-3.32,261.24


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1153000
  custom_metrics: {}
  date: 2021-11-05_20-50-20
  done: false
  episode_len_mean: 261.58
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.6157999999999877
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 4331
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 1.0019107924567328
          entropy_coeff: 0.009999999999999998
          kl: 0.04017333551531029
          policy_loss: -0.11902970986233817
          total_loss: -0.10960474726226595
          vf_explained_var: -0.05999315530061722
          vf_loss: 0.0158916218413247
    num_agent_steps_sampled: 1153000
    num_agent_steps_trained: 1153000
    num_steps_sampled: 1153000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1153,29877.5,1153000,-2.6158,-2.23,-3.32,261.58


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1154000
  custom_metrics: {}
  date: 2021-11-05_20-50-43
  done: false
  episode_len_mean: 263.5
  episode_media: {}
  episode_reward_max: -2.2299999999999964
  episode_reward_mean: -2.6349999999999874
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 4
  episodes_total: 4335
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13264199799420373
          cur_lr: 5.000000000000001e-05
          entropy: 1.0068290889263154
          entropy_coeff: 0.009999999999999998
          kl: 0.008973647400108812
          policy_loss: 0.021308077871799468
          total_loss: 0.0253101979692777
          vf_explained_var: 0.07462412863969803
          vf_loss: 0.012880128818667597
    num_agent_steps_sampled: 1154000
    num_agent_steps_trained: 1154000
    num_steps_sampled: 1154000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1154,29899.9,1154000,-2.635,-2.23,-3.32,263.5


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1155000
  custom_metrics: {}
  date: 2021-11-05_20-51-04
  done: false
  episode_len_mean: 264.84
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.648399999999987
  episode_reward_min: -3.319999999999973
  episodes_this_iter: 3
  episodes_total: 4338
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13264199799420373
          cur_lr: 5.000000000000001e-05
          entropy: 1.0848684423499637
          entropy_coeff: 0.009999999999999998
          kl: 0.008965576841364647
          policy_loss: 0.01544765465789371
          total_loss: 0.014648254877991146
          vf_explained_var: -0.2875404953956604
          vf_loss: 0.00886006926076435
    num_agent_steps_sampled: 1155000
    num_agent_steps_trained: 1155000
    num_steps_sampled: 1155000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1155,29921.1,1155000,-2.6484,-2.24,-3.32,264.84


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1156000
  custom_metrics: {}
  date: 2021-11-05_20-51-25
  done: false
  episode_len_mean: 267.0
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.669999999999986
  episode_reward_min: -3.429999999999971
  episodes_this_iter: 3
  episodes_total: 4341
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13264199799420373
          cur_lr: 5.000000000000001e-05
          entropy: 1.1389887107743157
          entropy_coeff: 0.009999999999999998
          kl: 0.013794863392765915
          policy_loss: -0.008460721042421128
          total_loss: -0.008279572096135881
          vf_explained_var: -0.15535150468349457
          vf_loss: 0.009741258793575172
    num_agent_steps_sampled: 1156000
    num_agent_steps_trained: 1156000
    num_steps_sampled: 1156000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1156,29941.6,1156000,-2.67,-2.24,-3.43,267


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1157000
  custom_metrics: {}
  date: 2021-11-05_20-51-47
  done: false
  episode_len_mean: 267.4
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.6739999999999866
  episode_reward_min: -3.429999999999971
  episodes_this_iter: 4
  episodes_total: 4345
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13264199799420373
          cur_lr: 5.000000000000001e-05
          entropy: 0.9936144948005676
          entropy_coeff: 0.009999999999999998
          kl: 0.013676843759148365
          policy_loss: 0.004309989760319392
          total_loss: 0.010289427720838122
          vf_explained_var: 0.07100961357355118
          vf_loss: 0.014101456850767136
    num_agent_steps_sampled: 1157000
    num_agent_steps_trained: 1157000
    num_steps_sampled: 1157000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1157,29964.3,1157000,-2.674,-2.24,-3.43,267.4


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1158000
  custom_metrics: {}
  date: 2021-11-05_20-52-08
  done: false
  episode_len_mean: 269.14
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.691399999999986
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 3
  episodes_total: 4348
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13264199799420373
          cur_lr: 5.000000000000001e-05
          entropy: 1.0909186926152972
          entropy_coeff: 0.009999999999999998
          kl: 0.018667442616067436
          policy_loss: 0.09713981747627258
          total_loss: 0.09455329726139704
          vf_explained_var: -0.23055782914161682
          vf_loss: 0.005846578016644344
    num_agent_steps_sampled: 1158000
    num_agent_steps_trained: 1158000
    num_steps_sampled: 1158000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1158,29985,1158000,-2.6914,-2.24,-4.09,269.14




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1159000
  custom_metrics: {}
  date: 2021-11-05_20-52-51
  done: false
  episode_len_mean: 268.07
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.680699999999986
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 4352
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13264199799420373
          cur_lr: 5.000000000000001e-05
          entropy: 0.9740698847505781
          entropy_coeff: 0.009999999999999998
          kl: 0.0113070567440859
          policy_loss: 0.007091315504577425
          total_loss: 0.014777565043833521
          vf_explained_var: 0.04105269908905029
          vf_loss: 0.015927158037407532
    num_agent_steps_sampled: 1159000
    num_agent_steps_trained: 1159000
    num_steps_sampled: 1159000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1159,30028.4,1159000,-2.6807,-2.24,-4.09,268.07


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1160000
  custom_metrics: {}
  date: 2021-11-05_20-53-17
  done: false
  episode_len_mean: 266.4
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.6639999999999873
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 4356
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13264199799420373
          cur_lr: 5.000000000000001e-05
          entropy: 0.8866199453671774
          entropy_coeff: 0.009999999999999998
          kl: 0.008013406320647773
          policy_loss: 0.005707622898949517
          total_loss: 0.013263972848653794
          vf_explained_var: 0.04203111678361893
          vf_loss: 0.015359635040577914
    num_agent_steps_sampled: 1160000
    num_agent_steps_trained: 1160000
    num_steps_sampled: 1160000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1160,30053.5,1160000,-2.664,-2.24,-4.09,266.4


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1161000
  custom_metrics: {}
  date: 2021-11-05_20-53-41
  done: false
  episode_len_mean: 265.18
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.6517999999999877
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 4360
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13264199799420373
          cur_lr: 5.000000000000001e-05
          entropy: 0.9125539071030087
          entropy_coeff: 0.009999999999999998
          kl: 0.00892234815050254
          policy_loss: 0.016457292520337636
          total_loss: 0.024713055044412614
          vf_explained_var: 0.04631159454584122
          vf_loss: 0.016197821290956604
    num_agent_steps_sampled: 1161000
    num_agent_steps_trained: 1161000
    num_steps_sampled: 1161000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1161,30077.6,1161000,-2.6518,-2.24,-4.09,265.18


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1162000
  custom_metrics: {}
  date: 2021-11-05_20-54-04
  done: false
  episode_len_mean: 265.31
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.653099999999987
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 4364
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13264199799420373
          cur_lr: 5.000000000000001e-05
          entropy: 0.9534359667036268
          entropy_coeff: 0.009999999999999998
          kl: 0.009099325865907362
          policy_loss: 0.008961846596664852
          total_loss: 0.017543888671530616
          vf_explained_var: 0.021993782371282578
          vf_loss: 0.0169094519275758
    num_agent_steps_sampled: 1162000
    num_agent_steps_trained: 1162000
    num_steps_sampled: 1162000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1162,30100.9,1162000,-2.6531,-2.24,-4.09,265.31


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1163000
  custom_metrics: {}
  date: 2021-11-05_20-54-28
  done: false
  episode_len_mean: 264.57
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.645699999999987
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 3
  episodes_total: 4367
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13264199799420373
          cur_lr: 5.000000000000001e-05
          entropy: 0.9015504247612424
          entropy_coeff: 0.009999999999999998
          kl: 0.006477199461407679
          policy_loss: -0.10455052306254704
          total_loss: -0.09641605665286382
          vf_explained_var: 0.06652630865573883
          vf_loss: 0.01629082353837374
    num_agent_steps_sampled: 1163000
    num_agent_steps_trained: 1163000
    num_steps_sampled: 1163000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1163,30125.4,1163000,-2.6457,-2.24,-4.09,264.57


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1164000
  custom_metrics: {}
  date: 2021-11-05_20-54-49
  done: false
  episode_len_mean: 265.84
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.6583999999999874
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 4371
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13264199799420373
          cur_lr: 5.000000000000001e-05
          entropy: 1.124506872230106
          entropy_coeff: 0.009999999999999998
          kl: 0.015052852692224958
          policy_loss: -0.005501893659432729
          total_loss: 0.0011216522090964848
          vf_explained_var: 0.0880344808101654
          vf_loss: 0.015871974411937925
    num_agent_steps_sampled: 1164000
    num_agent_steps_trained: 1164000
    num_steps_sampled: 1164000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1164,30145.5,1164000,-2.6584,-2.24,-4.09,265.84


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1165000
  custom_metrics: {}
  date: 2021-11-05_20-55-08
  done: false
  episode_len_mean: 267.95
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.6794999999999862
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 3
  episodes_total: 4374
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13264199799420373
          cur_lr: 5.000000000000001e-05
          entropy: 1.0799627502759297
          entropy_coeff: 0.009999999999999998
          kl: 0.011511791198806994
          policy_loss: 0.06692340051134428
          total_loss: 0.06846207537584835
          vf_explained_var: -0.31068745255470276
          vf_loss: 0.010811354036235975
    num_agent_steps_sampled: 1165000
    num_agent_steps_trained: 1165000
    num_steps_sampled: 1165000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1165,30165.2,1165000,-2.6795,-2.24,-4.09,267.95


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1166000
  custom_metrics: {}
  date: 2021-11-05_20-55-28
  done: false
  episode_len_mean: 270.67
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.706699999999986
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 3
  episodes_total: 4377
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13264199799420373
          cur_lr: 5.000000000000001e-05
          entropy: 1.0896760238541496
          entropy_coeff: 0.009999999999999998
          kl: 0.012521330514414647
          policy_loss: 0.06732897468739085
          total_loss: 0.06898298669192526
          vf_explained_var: -0.1772691309452057
          vf_loss: 0.010889915443678748
    num_agent_steps_sampled: 1166000
    num_agent_steps_trained: 1166000
    num_steps_sampled: 1166000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1166,30184.9,1166000,-2.7067,-2.24,-4.09,270.67




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1167000
  custom_metrics: {}
  date: 2021-11-05_20-56-08
  done: false
  episode_len_mean: 271.45
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.7144999999999855
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 3
  episodes_total: 4380
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13264199799420373
          cur_lr: 5.000000000000001e-05
          entropy: 1.064642107486725
          entropy_coeff: 0.009999999999999998
          kl: 0.017733327281936162
          policy_loss: 0.013633578187889524
          total_loss: 0.017453898323906793
          vf_explained_var: -0.34467172622680664
          vf_loss: 0.012114557085765733
    num_agent_steps_sampled: 1167000
    num_agent_steps_trained: 1167000
    num_steps_sampled: 1167000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1167,30224.9,1167000,-2.7145,-2.02,-4.09,271.45


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1168000
  custom_metrics: {}
  date: 2021-11-05_20-56-35
  done: false
  episode_len_mean: 270.12
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.7011999999999863
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 4384
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13264199799420373
          cur_lr: 5.000000000000001e-05
          entropy: 0.7549992905722724
          entropy_coeff: 0.009999999999999998
          kl: 0.009991090773647057
          policy_loss: -0.07000111192464828
          total_loss: -0.061759722895092434
          vf_explained_var: 0.1346433460712433
          vf_loss: 0.014466142147365543
    num_agent_steps_sampled: 1168000
    num_agent_steps_trained: 1168000
    num_steps_sampled: 1168000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1168,30251.3,1168000,-2.7012,-2.02,-4.09,270.12


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1169000
  custom_metrics: {}
  date: 2021-11-05_20-57-00
  done: false
  episode_len_mean: 269.23
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.692299999999986
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 5
  episodes_total: 4389
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13264199799420373
          cur_lr: 5.000000000000001e-05
          entropy: 0.7891415721840329
          entropy_coeff: 0.009999999999999998
          kl: 0.007532329326713397
          policy_loss: -0.010370153850979275
          total_loss: 0.0001236099335882399
          vf_explained_var: 0.06376953423023224
          vf_loss: 0.017386079600287808
    num_agent_steps_sampled: 1169000
    num_agent_steps_trained: 1169000
    num_steps_sampled: 1169000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1169,30276.8,1169000,-2.6923,-2.02,-4.09,269.23


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1170000
  custom_metrics: {}
  date: 2021-11-05_20-57-26
  done: false
  episode_len_mean: 268.75
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.687499999999986
  episode_reward_min: -4.089999999999957
  episodes_this_iter: 4
  episodes_total: 4393
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13264199799420373
          cur_lr: 5.000000000000001e-05
          entropy: 0.8952607459492153
          entropy_coeff: 0.009999999999999998
          kl: 0.02493107664067714
          policy_loss: -0.0044876294003592596
          total_loss: 0.001990301658709844
          vf_explained_var: 0.12197522819042206
          vf_loss: 0.012123630920218096
    num_agent_steps_sampled: 1170000
    num_agent_steps_trained: 1170000
    num_steps_sampled: 1170000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1170,30302.6,1170000,-2.6875,-2.02,-4.09,268.75


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1171000
  custom_metrics: {}
  date: 2021-11-05_20-57-43
  done: false
  episode_len_mean: 270.34
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.703399999999986
  episode_reward_min: -4.3599999999999515
  episodes_this_iter: 2
  episodes_total: 4395
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19896299699130554
          cur_lr: 5.000000000000001e-05
          entropy: 1.1980550097094642
          entropy_coeff: 0.009999999999999998
          kl: 0.03019765620510889
          policy_loss: -0.06407313098510106
          total_loss: -0.05973787771330939
          vf_explained_var: 0.06550492346286774
          vf_loss: 0.010307592821643792
    num_agent_steps_sampled: 1171000
    num_agent_steps_trained: 1171000
    num_steps_sampled: 1171000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1171,30319.8,1171000,-2.7034,-2.02,-4.36,270.34


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1172000
  custom_metrics: {}
  date: 2021-11-05_20-57-58
  done: false
  episode_len_mean: 274.91
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.7490999999999857
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 2
  episodes_total: 4397
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29844449548695823
          cur_lr: 5.000000000000001e-05
          entropy: 1.3711792733934192
          entropy_coeff: 0.009999999999999998
          kl: 0.013958348019657722
          policy_loss: -0.060907436203625466
          total_loss: -0.06332694474193785
          vf_explained_var: -0.09833099693059921
          vf_loss: 0.00712649138974181
    num_agent_steps_sampled: 1172000
    num_agent_steps_trained: 1172000
    num_steps_sampled: 1172000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1172,30334.6,1172000,-2.7491,-2.02,-5.14,274.91


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1173000
  custom_metrics: {}
  date: 2021-11-05_20-58-13
  done: false
  episode_len_mean: 279.45
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.7944999999999847
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 2
  episodes_total: 4399
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29844449548695823
          cur_lr: 5.000000000000001e-05
          entropy: 1.3608039220174153
          entropy_coeff: 0.009999999999999998
          kl: 0.01045112068801662
          policy_loss: -0.0878483817809158
          total_loss: -0.08669044739670223
          vf_explained_var: 0.016383497044444084
          vf_loss: 0.011646893158710252
    num_agent_steps_sampled: 1173000
    num_agent_steps_trained: 1173000
    num_steps_sampled: 1173000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1173,30349.6,1173000,-2.7945,-2.02,-5.14,279.45


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1174000
  custom_metrics: {}
  date: 2021-11-05_20-58-32
  done: false
  episode_len_mean: 282.65
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.826499999999984
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 3
  episodes_total: 4402
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29844449548695823
          cur_lr: 5.000000000000001e-05
          entropy: 1.1359827823109097
          entropy_coeff: 0.009999999999999998
          kl: 0.00829736880447456
          policy_loss: -0.09081651071707407
          total_loss: -0.08356995864046944
          vf_explained_var: 0.013119767419993877
          vf_loss: 0.01613007297532426
    num_agent_steps_sampled: 1174000
    num_agent_steps_trained: 1174000
    num_steps_sampled: 1174000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1174,30369,1174000,-2.8265,-2.02,-5.14,282.65


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1175000
  custom_metrics: {}
  date: 2021-11-05_20-58-51
  done: false
  episode_len_mean: 286.25
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.862499999999983
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 3
  episodes_total: 4405
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29844449548695823
          cur_lr: 5.000000000000001e-05
          entropy: 1.1875228352016873
          entropy_coeff: 0.009999999999999998
          kl: 0.015609255158742188
          policy_loss: -0.1138474088576105
          total_loss: -0.10470098389519586
          vf_explained_var: 0.016723942011594772
          vf_loss: 0.016363155158857504
    num_agent_steps_sampled: 1175000
    num_agent_steps_trained: 1175000
    num_steps_sampled: 1175000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1175,30387.8,1175000,-2.8625,-2.02,-5.14,286.25


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1176000
  custom_metrics: {}
  date: 2021-11-05_20-59-11
  done: false
  episode_len_mean: 288.42
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.8841999999999826
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 3
  episodes_total: 4408
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29844449548695823
          cur_lr: 5.000000000000001e-05
          entropy: 1.171634664800432
          entropy_coeff: 0.009999999999999998
          kl: 0.007294532896881852
          policy_loss: -0.09234938398003578
          total_loss: -0.08742081506384744
          vf_explained_var: 0.048934146761894226
          vf_loss: 0.014467902450511853
    num_agent_steps_sampled: 1176000
    num_agent_steps_trained: 1176000
    num_steps_sampled: 1176000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1176,30407.6,1176000,-2.8842,-2.02,-5.14,288.42




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1177000
  custom_metrics: {}
  date: 2021-11-05_20-59-50
  done: false
  episode_len_mean: 289.17
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.891699999999982
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 4
  episodes_total: 4412
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29844449548695823
          cur_lr: 5.000000000000001e-05
          entropy: 1.1389277319113413
          entropy_coeff: 0.009999999999999998
          kl: 0.008107609923179855
          policy_loss: 0.011306309617227978
          total_loss: 0.01805447753932741
          vf_explained_var: 0.07841034233570099
          vf_loss: 0.015717774536460637
    num_agent_steps_sampled: 1177000
    num_agent_steps_trained: 1177000
    num_steps_sampled: 1177000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1177,30446,1177000,-2.8917,-2.02,-5.14,289.17


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1178000
  custom_metrics: {}
  date: 2021-11-05_21-00-14
  done: false
  episode_len_mean: 289.3
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.892999999999982
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 4
  episodes_total: 4416
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29844449548695823
          cur_lr: 5.000000000000001e-05
          entropy: 0.8812511463960012
          entropy_coeff: 0.009999999999999998
          kl: 0.00757126201942696
          policy_loss: 0.0043861592809359235
          total_loss: 0.01394262057211664
          vf_explained_var: 0.040103256702423096
          vf_loss: 0.016109371713052194
    num_agent_steps_sampled: 1178000
    num_agent_steps_trained: 1178000
    num_steps_sampled: 1178000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1178,30470.4,1178000,-2.893,-2.02,-5.14,289.3


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1179000
  custom_metrics: {}
  date: 2021-11-05_21-00-38
  done: false
  episode_len_mean: 289.56
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.8955999999999817
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 3
  episodes_total: 4419
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29844449548695823
          cur_lr: 5.000000000000001e-05
          entropy: 0.9337157600455814
          entropy_coeff: 0.009999999999999998
          kl: 0.006375433160614187
          policy_loss: -0.11787512964672513
          total_loss: -0.10984478327963088
          vf_explained_var: 0.09178142249584198
          vf_loss: 0.015464789275493886
    num_agent_steps_sampled: 1179000
    num_agent_steps_trained: 1179000
    num_steps_sampled: 1179000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1179,30494.2,1179000,-2.8956,-2.02,-5.14,289.56


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1180000
  custom_metrics: {}
  date: 2021-11-05_21-01-01
  done: false
  episode_len_mean: 291.03
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.9102999999999817
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 4
  episodes_total: 4423
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29844449548695823
          cur_lr: 5.000000000000001e-05
          entropy: 1.033161743481954
          entropy_coeff: 0.009999999999999998
          kl: 0.009531689711327356
          policy_loss: 0.022141474071476194
          total_loss: 0.027744465238518184
          vf_explained_var: 0.05467822402715683
          vf_loss: 0.013089926520155535
    num_agent_steps_sampled: 1180000
    num_agent_steps_trained: 1180000
    num_steps_sampled: 1180000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1180,30517,1180000,-2.9103,-2.02,-5.14,291.03


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1181000
  custom_metrics: {}
  date: 2021-11-05_21-01-24
  done: false
  episode_len_mean: 291.39
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.9138999999999813
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 4
  episodes_total: 4427
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29844449548695823
          cur_lr: 5.000000000000001e-05
          entropy: 1.1498761786354912
          entropy_coeff: 0.009999999999999998
          kl: 0.00562883404371443
          policy_loss: 0.026467826631334092
          total_loss: 0.030906304717063904
          vf_explained_var: 0.08412186056375504
          vf_loss: 0.014257346849060722
    num_agent_steps_sampled: 1181000
    num_agent_steps_trained: 1181000
    num_steps_sampled: 1181000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1181,30540.7,1181000,-2.9139,-2.02,-5.14,291.39


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1182000
  custom_metrics: {}
  date: 2021-11-05_21-01-48
  done: false
  episode_len_mean: 292.63
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.9262999999999812
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 3
  episodes_total: 4430
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29844449548695823
          cur_lr: 5.000000000000001e-05
          entropy: 1.3034550560845268
          entropy_coeff: 0.009999999999999998
          kl: 0.004501522199380285
          policy_loss: 0.03703123049603568
          total_loss: 0.03611674043867323
          vf_explained_var: -0.10308481007814407
          vf_loss: 0.010776601375416956
    num_agent_steps_sampled: 1182000
    num_agent_steps_trained: 1182000
    num_steps_sampled: 1182000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1182,30564,1182000,-2.9263,-2.02,-5.14,292.63


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1183000
  custom_metrics: {}
  date: 2021-11-05_21-02-11
  done: false
  episode_len_mean: 292.07
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.920699999999981
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 4
  episodes_total: 4434
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 1.1150859554608663
          entropy_coeff: 0.009999999999999998
          kl: 0.011842405902167464
          policy_loss: 0.03448809265262551
          total_loss: 0.03848412041034963
          vf_explained_var: 0.010745508596301079
          vf_loss: 0.013379735566882624
    num_agent_steps_sampled: 1183000
    num_agent_steps_trained: 1183000
    num_steps_sampled: 1183000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1183,30587.7,1183000,-2.9207,-2.02,-5.14,292.07


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1184000
  custom_metrics: {}
  date: 2021-11-05_21-02-35
  done: false
  episode_len_mean: 291.05
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.9104999999999808
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 4
  episodes_total: 4438
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 1.241245276398129
          entropy_coeff: 0.009999999999999998
          kl: 0.010348483455728936
          policy_loss: 0.046983766555786136
          total_loss: 0.04755823661883672
          vf_explained_var: 0.059118110686540604
          vf_loss: 0.011442696406609483
    num_agent_steps_sampled: 1184000
    num_agent_steps_trained: 1184000
    num_steps_sampled: 1184000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1184,30611.6,1184000,-2.9105,-2.02,-5.14,291.05




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1185000
  custom_metrics: {}
  date: 2021-11-05_21-03-18
  done: false
  episode_len_mean: 288.55
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.8854999999999826
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 4
  episodes_total: 4442
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 0.9841908944977654
          entropy_coeff: 0.009999999999999998
          kl: 0.009150850220878758
          policy_loss: -0.007816085426343811
          total_loss: 0.00036433421903186373
          vf_explained_var: 0.03624260798096657
          vf_loss: 0.016656819596472715
    num_agent_steps_sampled: 1185000
    num_agent_steps_trained: 1185000
    num_steps_sampled: 1185000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1185,30654,1185000,-2.8855,-2.02,-5.14,288.55


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1186000
  custom_metrics: {}
  date: 2021-11-05_21-03-42
  done: false
  episode_len_mean: 287.59
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.875899999999982
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 4
  episodes_total: 4446
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 1.1226523796717325
          entropy_coeff: 0.009999999999999998
          kl: 0.008356332118997046
          policy_loss: 0.019984230481916002
          total_loss: 0.02715001284248299
          vf_explained_var: 0.02068573422729969
          vf_loss: 0.01714535208625926
    num_agent_steps_sampled: 1186000
    num_agent_steps_trained: 1186000
    num_steps_sampled: 1186000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1186,30678.6,1186000,-2.8759,-2.02,-5.14,287.59


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1187000
  custom_metrics: {}
  date: 2021-11-05_21-04-09
  done: false
  episode_len_mean: 286.43
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.864299999999983
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 3
  episodes_total: 4449
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 1.2745438920127021
          entropy_coeff: 0.009999999999999998
          kl: 0.01365134070903022
          policy_loss: -0.0003513746791415744
          total_loss: -0.00018114646275838215
          vf_explained_var: 0.054335758090019226
          vf_loss: 0.010878581157885491
    num_agent_steps_sampled: 1187000
    num_agent_steps_trained: 1187000
    num_steps_sampled: 1187000
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1187,30705.3,1187000,-2.8643,-2.02,-5.14,286.43


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1188000
  custom_metrics: {}
  date: 2021-11-05_21-04-33
  done: false
  episode_len_mean: 287.43
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.8742999999999825
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 4
  episodes_total: 4453
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 1.1248185449176364
          entropy_coeff: 0.009999999999999998
          kl: 0.011839198124883174
          policy_loss: 0.012429448382722006
          total_loss: 0.018618182465434074
          vf_explained_var: 0.07955242693424225
          vf_loss: 0.015670245161486997
    num_agent_steps_sampled: 1188000
    num_agent_steps_trained: 1188000
    num_steps_sampled: 1188000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1188,30729.6,1188000,-2.8743,-2.02,-5.14,287.43


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1189000
  custom_metrics: {}
  date: 2021-11-05_21-04-58
  done: false
  episode_len_mean: 287.56
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.875599999999983
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 4
  episodes_total: 4457
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 0.9699752993053861
          entropy_coeff: 0.009999999999999998
          kl: 0.010553462247760572
          policy_loss: 0.0005747286809815301
          total_loss: 0.007692045304510328
          vf_explained_var: 0.05562414973974228
          vf_loss: 0.015242259783877266
    num_agent_steps_sampled: 1189000
    num_agent_steps_trained: 1189000
    num_steps_sampled: 1189000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1189,30754,1189000,-2.8756,-2.02,-5.14,287.56


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1190000
  custom_metrics: {}
  date: 2021-11-05_21-05-22
  done: false
  episode_len_mean: 287.22
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.8721999999999825
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 4
  episodes_total: 4461
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 1.049002426200443
          entropy_coeff: 0.009999999999999998
          kl: 0.011835924159463376
          policy_loss: 0.061062048623959225
          total_loss: 0.06385838563243548
          vf_explained_var: 0.0212399885058403
          vf_loss: 0.011520176409329804
    num_agent_steps_sampled: 1190000
    num_agent_steps_trained: 1190000
    num_steps_sampled: 1190000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1190,30777.8,1190000,-2.8722,-2.02,-5.14,287.22


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1191000
  custom_metrics: {}
  date: 2021-11-05_21-05-46
  done: false
  episode_len_mean: 287.4
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.873999999999983
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 4
  episodes_total: 4465
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 1.190537699063619
          entropy_coeff: 0.009999999999999998
          kl: 0.008149167740507184
          policy_loss: -0.023445169544882245
          total_loss: -0.017638242244720458
          vf_explained_var: 0.06550097465515137
          vf_loss: 0.016496266114215057
    num_agent_steps_sampled: 1191000
    num_agent_steps_trained: 1191000
    num_steps_sampled: 1191000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1191,30802.2,1191000,-2.874,-2.02,-5.14,287.4


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1192000
  custom_metrics: {}
  date: 2021-11-05_21-06-10
  done: false
  episode_len_mean: 287.91
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.8790999999999825
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 3
  episodes_total: 4468
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 1.273942357963986
          entropy_coeff: 0.009999999999999998
          kl: 0.010847823668901274
          policy_loss: -0.042789629598458605
          total_loss: -0.0422405228846603
          vf_explained_var: 0.05040751397609711
          vf_loss: 0.011669795905860762
    num_agent_steps_sampled: 1192000
    num_agent_steps_trained: 1192000
    num_steps_sampled: 1192000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1192,30826,1192000,-2.8791,-2.02,-5.14,287.91




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1193000
  custom_metrics: {}
  date: 2021-11-05_21-06-52
  done: false
  episode_len_mean: 284.37
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.843699999999983
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 4
  episodes_total: 4472
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 0.9341992364989387
          entropy_coeff: 0.009999999999999998
          kl: 0.010056205663392999
          policy_loss: -0.05506660557455487
          total_loss: -0.0501864197353522
          vf_explained_var: 0.16482043266296387
          vf_loss: 0.012721568242543273
    num_agent_steps_sampled: 1193000
    num_agent_steps_trained: 1193000
    num_steps_sampled: 1193000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1193,30868,1193000,-2.8437,-2.02,-5.14,284.37


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1194000
  custom_metrics: {}
  date: 2021-11-05_21-07-17
  done: false
  episode_len_mean: 282.3
  episode_media: {}
  episode_reward_max: -2.020000000000001
  episode_reward_mean: -2.8229999999999835
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 4
  episodes_total: 4476
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 0.9628556191921234
          entropy_coeff: 0.009999999999999998
          kl: 0.0120595146733244
          policy_loss: -0.03338520245419608
          total_loss: -0.026766091088453927
          vf_explained_var: 0.03673303499817848
          vf_loss: 0.014448122359398338
    num_agent_steps_sampled: 1194000
    num_agent_steps_trained: 1194000
    num_steps_sampled: 1194000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1194,30892.6,1194000,-2.823,-2.02,-5.14,282.3


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1195000
  custom_metrics: {}
  date: 2021-11-05_21-07-42
  done: false
  episode_len_mean: 279.8
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.797999999999984
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 4
  episodes_total: 4480
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 0.9364740312099457
          entropy_coeff: 0.009999999999999998
          kl: 0.009246659543103301
          policy_loss: -0.03183603998687532
          total_loss: -0.025845444823304812
          vf_explained_var: 0.037588976323604584
          vf_loss: 0.01397553144229783
    num_agent_steps_sampled: 1195000
    num_agent_steps_trained: 1195000
    num_steps_sampled: 1195000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1195,30917.5,1195000,-2.798,-2.05,-5.14,279.8


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1196000
  custom_metrics: {}
  date: 2021-11-05_21-08-05
  done: false
  episode_len_mean: 281.1
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.810999999999983
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 4
  episodes_total: 4484
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 1.0476516385873158
          entropy_coeff: 0.009999999999999998
          kl: 0.011123089885717894
          policy_loss: 0.037696298708518344
          total_loss: 0.03828124838570754
          vf_explained_var: 0.3045393228530884
          vf_loss: 0.00940165426582098
    num_agent_steps_sampled: 1196000
    num_agent_steps_trained: 1196000
    num_steps_sampled: 1196000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1196,30941.2,1196000,-2.811,-2.05,-5.14,281.1


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1197000
  custom_metrics: {}
  date: 2021-11-05_21-08-29
  done: false
  episode_len_mean: 282.28
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.822799999999984
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 4
  episodes_total: 4488
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 1.0154282814926572
          entropy_coeff: 0.009999999999999998
          kl: 0.009515157747357408
          policy_loss: 0.06786364995770984
          total_loss: 0.06927438005805016
          vf_explained_var: 0.15453340113162994
          vf_loss: 0.01014513773843646
    num_agent_steps_sampled: 1197000
    num_agent_steps_trained: 1197000
    num_steps_sampled: 1197000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1197,30964.8,1197000,-2.8228,-2.05,-5.14,282.28


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1198000
  custom_metrics: {}
  date: 2021-11-05_21-08-54
  done: false
  episode_len_mean: 282.64
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.826399999999984
  episode_reward_min: -5.139999999999935
  episodes_this_iter: 4
  episodes_total: 4492
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 0.9796606361865997
          entropy_coeff: 0.009999999999999998
          kl: 0.007358349017965571
          policy_loss: -0.0015403900709417132
          total_loss: 0.0020665133164988626
          vf_explained_var: 0.09942714124917984
          vf_loss: 0.012305481669803461
    num_agent_steps_sampled: 1198000
    num_agent_steps_trained: 1198000
    num_steps_sampled: 1198000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1198,30989.9,1198000,-2.8264,-2.05,-5.14,282.64


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1199000
  custom_metrics: {}
  date: 2021-11-05_21-09-19
  done: false
  episode_len_mean: 278.43
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.784299999999984
  episode_reward_min: -4.949999999999939
  episodes_this_iter: 4
  episodes_total: 4496
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 0.9373701069090101
          entropy_coeff: 0.009999999999999998
          kl: 0.006227332987750364
          policy_loss: 0.00483898056877984
          total_loss: 0.008774268958303664
          vf_explained_var: 0.04678795486688614
          vf_loss: 0.012379730006472932
    num_agent_steps_sampled: 1199000
    num_agent_steps_trained: 1199000
    num_steps_sampled: 1199000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1199,31014.9,1199000,-2.7843,-2.05,-4.95,278.43




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1200000
  custom_metrics: {}
  date: 2021-11-05_21-10-02
  done: false
  episode_len_mean: 269.26
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.6925999999999863
  episode_reward_min: -4.4499999999999496
  episodes_this_iter: 4
  episodes_total: 4500
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 0.973579599459966
          entropy_coeff: 0.009999999999999998
          kl: 0.006552561243471978
          policy_loss: 0.014921888874636757
          total_loss: 0.018481033709314136
          vf_explained_var: 0.04741587117314339
          vf_loss: 0.01231715606732501
    num_agent_steps_sampled: 1200000
    num_agent_steps_trained: 1200000
    num_steps_sampled: 1200000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1200,31057.7,1200000,-2.6926,-2.05,-4.45,269.26


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1201000
  custom_metrics: {}
  date: 2021-11-05_21-10-27
  done: false
  episode_len_mean: 266.12
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.661199999999987
  episode_reward_min: -4.189999999999955
  episodes_this_iter: 4
  episodes_total: 4504
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 0.9005423638555738
          entropy_coeff: 0.009999999999999998
          kl: 0.01307031651036748
          policy_loss: 0.009743579063150617
          total_loss: 0.016994559350940915
          vf_explained_var: 0.05168713629245758
          vf_loss: 0.014306021212703652
    num_agent_steps_sampled: 1201000
    num_agent_steps_trained: 1201000
    num_steps_sampled: 1201000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1201,31083,1201000,-2.6612,-2.05,-4.19,266.12


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1202000
  custom_metrics: {}
  date: 2021-11-05_21-10-52
  done: false
  episode_len_mean: 262.14
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.621399999999988
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 4508
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 0.9028126875559489
          entropy_coeff: 0.009999999999999998
          kl: 0.018522592966436543
          policy_loss: 0.02459344690044721
          total_loss: 0.03269920891357793
          vf_explained_var: 0.03909338638186455
          vf_loss: 0.014369906360904376
    num_agent_steps_sampled: 1202000
    num_agent_steps_trained: 1202000
    num_steps_sampled: 1202000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1202,31107.7,1202000,-2.6214,-2.05,-3.5,262.14


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1203000
  custom_metrics: {}
  date: 2021-11-05_21-11-16
  done: false
  episode_len_mean: 260.81
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.608099999999988
  episode_reward_min: -3.0299999999999794
  episodes_this_iter: 3
  episodes_total: 4511
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 0.846629700395796
          entropy_coeff: 0.009999999999999998
          kl: 0.011404547879105486
          policy_loss: -0.1097869293557273
          total_loss: -0.10378966050015556
          vf_explained_var: 0.12734131515026093
          vf_loss: 0.01276175681915548
    num_agent_steps_sampled: 1203000
    num_agent_steps_trained: 1203000
    num_steps_sampled: 1203000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1203,31132.2,1203000,-2.6081,-2.05,-3.03,260.81


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1204000
  custom_metrics: {}
  date: 2021-11-05_21-11-40
  done: false
  episode_len_mean: 260.9
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.6089999999999884
  episode_reward_min: -3.0299999999999794
  episodes_this_iter: 4
  episodes_total: 4515
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 0.7134385950035519
          entropy_coeff: 0.009999999999999998
          kl: 0.004955148067965353
          policy_loss: -0.04058214815126525
          total_loss: -0.03578330667482482
          vf_explained_var: 0.11976141482591629
          vf_loss: 0.011193806450400088
    num_agent_steps_sampled: 1204000
    num_agent_steps_trained: 1204000
    num_steps_sampled: 1204000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1204,31155.9,1204000,-2.609,-2.05,-3.03,260.9


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1205000
  custom_metrics: {}
  date: 2021-11-05_21-12-04
  done: false
  episode_len_mean: 261.01
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.6100999999999885
  episode_reward_min: -3.0299999999999794
  episodes_this_iter: 4
  episodes_total: 4519
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07461112387173956
          cur_lr: 5.000000000000001e-05
          entropy: 0.8527589281400044
          entropy_coeff: 0.009999999999999998
          kl: 0.008807519239348633
          policy_loss: -0.001860350618759791
          total_loss: 0.0010279438147942225
          vf_explained_var: 0.13654468953609467
          vf_loss: 0.010758741826025976
    num_agent_steps_sampled: 1205000
    num_agent_steps_trained: 1205000
    num_steps_sampled: 1205000
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1205,31179.7,1205000,-2.6101,-2.05,-3.03,261.01


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1206000
  custom_metrics: {}
  date: 2021-11-05_21-12-27
  done: false
  episode_len_mean: 260.38
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.6037999999999886
  episode_reward_min: -2.9299999999999815
  episodes_this_iter: 4
  episodes_total: 4523
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07461112387173956
          cur_lr: 5.000000000000001e-05
          entropy: 0.7455920914808909
          entropy_coeff: 0.009999999999999998
          kl: 0.010162798250390716
          policy_loss: 0.022709657500187556
          total_loss: 0.026736808030141723
          vf_explained_var: 0.11248090863227844
          vf_loss: 0.010724812497695286
    num_agent_steps_sampled: 1206000
    num_agent_steps_trained: 1206000
    num_steps_sampled: 1206000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1206,31203.1,1206000,-2.6038,-2.05,-2.93,260.38


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1207000
  custom_metrics: {}
  date: 2021-11-05_21-12-51
  done: false
  episode_len_mean: 260.46
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.6045999999999885
  episode_reward_min: -2.9299999999999815
  episodes_this_iter: 3
  episodes_total: 4526
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07461112387173956
          cur_lr: 5.000000000000001e-05
          entropy: 0.6726240671343273
          entropy_coeff: 0.009999999999999998
          kl: 0.01432653754570623
          policy_loss: -0.08743318468332291
          total_loss: -0.08406928545898862
          vf_explained_var: 0.05447371304035187
          vf_loss: 0.009021220343290932
    num_agent_steps_sampled: 1207000
    num_agent_steps_trained: 1207000
    num_steps_sampled: 1207000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1207,31226.7,1207000,-2.6046,-2.05,-2.93,260.46




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1208000
  custom_metrics: {}
  date: 2021-11-05_21-13-31
  done: false
  episode_len_mean: 259.86
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5985999999999887
  episode_reward_min: -2.9299999999999815
  episodes_this_iter: 4
  episodes_total: 4530
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07461112387173956
          cur_lr: 5.000000000000001e-05
          entropy: 0.9759446899096171
          entropy_coeff: 0.009999999999999998
          kl: 0.012425284752428292
          policy_loss: -0.027526613656017516
          total_loss: -0.023264876670307584
          vf_explained_var: 0.03767768293619156
          vf_loss: 0.013094117544177506
    num_agent_steps_sampled: 1208000
    num_agent_steps_trained: 1208000
    num_steps_sampled: 1208000
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1208,31267.1,1208000,-2.5986,-2.05,-2.93,259.86


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1209000
  custom_metrics: {}
  date: 2021-11-05_21-13-57
  done: false
  episode_len_mean: 259.9
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.598999999999988
  episode_reward_min: -2.9299999999999815
  episodes_this_iter: 4
  episodes_total: 4534
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07461112387173956
          cur_lr: 5.000000000000001e-05
          entropy: 0.8528013308842977
          entropy_coeff: 0.009999999999999998
          kl: 0.008409347969350339
          policy_loss: 0.007534891201390161
          total_loss: 0.012472762043277423
          vf_explained_var: 0.09726712852716446
          vf_loss: 0.012838455341342423
    num_agent_steps_sampled: 1209000
    num_agent_steps_trained: 1209000
    num_steps_sampled: 1209000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1209,31292.4,1209000,-2.599,-2.05,-2.93,259.9


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1210000
  custom_metrics: {}
  date: 2021-11-05_21-14-21
  done: false
  episode_len_mean: 259.61
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5960999999999887
  episode_reward_min: -2.9299999999999815
  episodes_this_iter: 4
  episodes_total: 4538
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07461112387173956
          cur_lr: 5.000000000000001e-05
          entropy: 0.7243014613787333
          entropy_coeff: 0.009999999999999998
          kl: 0.0058776634388957895
          policy_loss: 0.00496676183409161
          total_loss: 0.011258302132288615
          vf_explained_var: 0.14066511392593384
          vf_loss: 0.013096015362275972
    num_agent_steps_sampled: 1210000
    num_agent_steps_trained: 1210000
    num_steps_sampled: 1210000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1210,31316.8,1210000,-2.5961,-2.05,-2.93,259.61


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1211000
  custom_metrics: {}
  date: 2021-11-05_21-14-45
  done: false
  episode_len_mean: 260.03
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.6002999999999883
  episode_reward_min: -2.9299999999999815
  episodes_this_iter: 4
  episodes_total: 4542
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07461112387173956
          cur_lr: 5.000000000000001e-05
          entropy: 0.7920034534401363
          entropy_coeff: 0.009999999999999998
          kl: 0.0035569354021163235
          policy_loss: 0.025911425633562937
          total_loss: 0.031462063143650694
          vf_explained_var: 0.06488528847694397
          vf_loss: 0.013205283207611905
    num_agent_steps_sampled: 1211000
    num_agent_steps_trained: 1211000
    num_steps_sampled: 1211000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1211,31340.9,1211000,-2.6003,-2.17,-2.93,260.03


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1212000
  custom_metrics: {}
  date: 2021-11-05_21-15-10
  done: false
  episode_len_mean: 260.46
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.6045999999999885
  episode_reward_min: -2.9299999999999815
  episodes_this_iter: 3
  episodes_total: 4545
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03730556193586978
          cur_lr: 5.000000000000001e-05
          entropy: 0.7072029948234558
          entropy_coeff: 0.009999999999999998
          kl: 0.013138202488305398
          policy_loss: -0.11606565192341804
          total_loss: -0.11276035540633732
          vf_explained_var: 0.03370940312743187
          vf_loss: 0.00988720425715049
    num_agent_steps_sampled: 1212000
    num_agent_steps_trained: 1212000
    num_steps_sampled: 1212000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1212,31365.1,1212000,-2.6046,-2.17,-2.93,260.46


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1213000
  custom_metrics: {}
  date: 2021-11-05_21-15-34
  done: false
  episode_len_mean: 260.17
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.601699999999988
  episode_reward_min: -2.9299999999999815
  episodes_this_iter: 4
  episodes_total: 4549
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03730556193586978
          cur_lr: 5.000000000000001e-05
          entropy: 0.3544141408469942
          entropy_coeff: 0.009999999999999998
          kl: 0.011551777415461591
          policy_loss: -0.08928191947440306
          total_loss: -0.07764568395084805
          vf_explained_var: 0.048181045800447464
          vf_loss: 0.014749433534840742
    num_agent_steps_sampled: 1213000
    num_agent_steps_trained: 1213000
    num_steps_sampled: 1213000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1213,31389,1213000,-2.6017,-2.17,-2.93,260.17


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1214000
  custom_metrics: {}
  date: 2021-11-05_21-15-57
  done: false
  episode_len_mean: 260.05
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.6004999999999883
  episode_reward_min: -2.9299999999999815
  episodes_this_iter: 4
  episodes_total: 4553
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03730556193586978
          cur_lr: 5.000000000000001e-05
          entropy: 0.4263712740606732
          entropy_coeff: 0.009999999999999998
          kl: 0.00529394814212969
          policy_loss: 0.014691134749187363
          total_loss: 0.0240537760572301
          vf_explained_var: 0.05949082225561142
          vf_loss: 0.013428861244271199
    num_agent_steps_sampled: 1214000
    num_agent_steps_trained: 1214000
    num_steps_sampled: 1214000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1214,31413,1214000,-2.6005,-2.17,-2.93,260.05


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1215000
  custom_metrics: {}
  date: 2021-11-05_21-16-22
  done: false
  episode_len_mean: 260.22
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.6021999999999883
  episode_reward_min: -2.9299999999999815
  episodes_this_iter: 4
  episodes_total: 4557
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03730556193586978
          cur_lr: 5.000000000000001e-05
          entropy: 0.5611911336580913
          entropy_coeff: 0.009999999999999998
          kl: 0.010730711677950764
          policy_loss: 0.035198317550950584
          total_loss: 0.042867725011375216
          vf_explained_var: 0.1357605755329132
          vf_loss: 0.012881004913813538
    num_agent_steps_sampled: 1215000
    num_agent_steps_trained: 1215000
    num_steps_sampled: 1215000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1215,31437.3,1215000,-2.6022,-2.17,-2.93,260.22




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1216000
  custom_metrics: {}
  date: 2021-11-05_21-17-04
  done: false
  episode_len_mean: 259.61
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.5960999999999887
  episode_reward_min: -2.7899999999999845
  episodes_this_iter: 4
  episodes_total: 4561
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03730556193586978
          cur_lr: 5.000000000000001e-05
          entropy: 0.6085185322496626
          entropy_coeff: 0.009999999999999998
          kl: 0.011450464265538881
          policy_loss: 0.03700792756345537
          total_loss: 0.04731139515837034
          vf_explained_var: 0.13860318064689636
          vf_loss: 0.015961487394654087
    num_agent_steps_sampled: 1216000
    num_agent_steps_trained: 1216000
    num_steps_sampled: 1216000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1216,31479.3,1216000,-2.5961,-2.17,-2.79,259.61


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1217000
  custom_metrics: {}
  date: 2021-11-05_21-17-29
  done: false
  episode_len_mean: 259.28
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.5927999999999884
  episode_reward_min: -2.7899999999999845
  episodes_this_iter: 4
  episodes_total: 4565
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03730556193586978
          cur_lr: 5.000000000000001e-05
          entropy: 0.8180727508332994
          entropy_coeff: 0.009999999999999998
          kl: 0.0205206004211615
          policy_loss: 0.013976934800545375
          total_loss: 0.027516244020726947
          vf_explained_var: 0.019994214177131653
          vf_loss: 0.020954502477414077
    num_agent_steps_sampled: 1217000
    num_agent_steps_trained: 1217000
    num_steps_sampled: 1217000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1217,31504,1217000,-2.5928,-2.17,-2.79,259.28


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1218000
  custom_metrics: {}
  date: 2021-11-05_21-17-52
  done: false
  episode_len_mean: 259.36
  episode_media: {}
  episode_reward_max: -2.1699999999999977
  episode_reward_mean: -2.5935999999999884
  episode_reward_min: -2.909999999999982
  episodes_this_iter: 3
  episodes_total: 4568
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05595834290380467
          cur_lr: 5.000000000000001e-05
          entropy: 0.6402350488636229
          entropy_coeff: 0.009999999999999998
          kl: 0.010095563983617398
          policy_loss: -0.08899262009395494
          total_loss: -0.07954987055725521
          vf_explained_var: 0.2033918797969818
          vf_loss: 0.015280166878882381
    num_agent_steps_sampled: 1218000
    num_agent_steps_trained: 1218000
    num_steps_sampled: 1218000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1218,31527.5,1218000,-2.5936,-2.17,-2.91,259.36


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1219000
  custom_metrics: {}
  date: 2021-11-05_21-18-15
  done: false
  episode_len_mean: 260.68
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.6067999999999887
  episode_reward_min: -2.909999999999982
  episodes_this_iter: 4
  episodes_total: 4572
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05595834290380467
          cur_lr: 5.000000000000001e-05
          entropy: 0.5489927262067795
          entropy_coeff: 0.009999999999999998
          kl: 0.00471038037644765
          policy_loss: 0.029825479288895927
          total_loss: 0.03789497572514746
          vf_explained_var: 0.22060836851596832
          vf_loss: 0.0132958370157414
    num_agent_steps_sampled: 1219000
    num_agent_steps_trained: 1219000
    num_steps_sampled: 1219000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1219,31550.1,1219000,-2.6068,-2.18,-2.91,260.68


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1220000
  custom_metrics: {}
  date: 2021-11-05_21-18-38
  done: false
  episode_len_mean: 261.37
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.613699999999988
  episode_reward_min: -2.909999999999982
  episodes_this_iter: 4
  episodes_total: 4576
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.027979171451902336
          cur_lr: 5.000000000000001e-05
          entropy: 0.6240046077304416
          entropy_coeff: 0.009999999999999998
          kl: 0.020878237352585434
          policy_loss: 0.008948989336689313
          total_loss: 0.015949304650227227
          vf_explained_var: 0.13246482610702515
          vf_loss: 0.012656202125880454
    num_agent_steps_sampled: 1220000
    num_agent_steps_trained: 1220000
    num_steps_sampled: 1220000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1220,31573.6,1220000,-2.6137,-2.18,-2.91,261.37


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1221000
  custom_metrics: {}
  date: 2021-11-05_21-19-01
  done: false
  episode_len_mean: 262.62
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.6261999999999874
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 3
  episodes_total: 4579
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04196875717785351
          cur_lr: 5.000000000000001e-05
          entropy: 1.0193907890054914
          entropy_coeff: 0.009999999999999998
          kl: 0.0312636304562479
          policy_loss: 0.023275358312659793
          total_loss: 0.025745316098133724
          vf_explained_var: -0.18479755520820618
          vf_loss: 0.011351769563690241
    num_agent_steps_sampled: 1221000
    num_agent_steps_trained: 1221000
    num_steps_sampled: 1221000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1221,31596,1221000,-2.6262,-2.18,-3.27,262.62


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1222000
  custom_metrics: {}
  date: 2021-11-05_21-19-23
  done: false
  episode_len_mean: 263.55
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.635499999999987
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 4
  episodes_total: 4583
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06295313576678027
          cur_lr: 5.000000000000001e-05
          entropy: 1.0262834224436017
          entropy_coeff: 0.009999999999999998
          kl: 0.010829644850725609
          policy_loss: -0.008016650047567155
          total_loss: -0.001658675322930018
          vf_explained_var: 0.06258001178503036
          vf_loss: 0.015939050095362797
    num_agent_steps_sampled: 1222000
    num_agent_steps_trained: 1222000
    num_steps_sampled: 1222000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1222,31618.4,1222000,-2.6355,-2.18,-3.27,263.55


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1223000
  custom_metrics: {}
  date: 2021-11-05_21-19-46
  done: false
  episode_len_mean: 263.89
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.638899999999987
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 3
  episodes_total: 4586
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06295313576678027
          cur_lr: 5.000000000000001e-05
          entropy: 0.8373220553000768
          entropy_coeff: 0.009999999999999998
          kl: 0.03397625886825427
          policy_loss: -0.014473229770859082
          total_loss: -0.010287555307149887
          vf_explained_var: 0.23963563144207
          vf_loss: 0.010419981393756137
    num_agent_steps_sampled: 1223000
    num_agent_steps_trained: 1223000
    num_steps_sampled: 1223000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1223,31641.6,1223000,-2.6389,-2.18,-3.27,263.89




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1224000
  custom_metrics: {}
  date: 2021-11-05_21-20-26
  done: false
  episode_len_mean: 264.4
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.6439999999999872
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 4
  episodes_total: 4590
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09442970365017042
          cur_lr: 5.000000000000001e-05
          entropy: 0.7879164576530456
          entropy_coeff: 0.009999999999999998
          kl: 0.01662406134749964
          policy_loss: -0.01454648267891672
          total_loss: -0.006737295703755484
          vf_explained_var: 0.2828080356121063
          vf_loss: 0.014118547592726018
    num_agent_steps_sampled: 1224000
    num_agent_steps_trained: 1224000
    num_steps_sampled: 1224000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1224,31681.1,1224000,-2.644,-2.18,-3.27,264.4


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1225000
  custom_metrics: {}
  date: 2021-11-05_21-20-51
  done: false
  episode_len_mean: 265.24
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.652399999999987
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 3
  episodes_total: 4593
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09442970365017042
          cur_lr: 5.000000000000001e-05
          entropy: 0.5293253226412667
          entropy_coeff: 0.009999999999999998
          kl: 0.012444146096539772
          policy_loss: -0.10416305826769935
          total_loss: -0.09528347245521016
          vf_explained_var: 0.18898069858551025
          vf_loss: 0.012997743528750208
    num_agent_steps_sampled: 1225000
    num_agent_steps_trained: 1225000
    num_steps_sampled: 1225000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1225,31706.2,1225000,-2.6524,-2.18,-3.27,265.24


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1226000
  custom_metrics: {}
  date: 2021-11-05_21-21-14
  done: false
  episode_len_mean: 266.27
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.662699999999987
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 4
  episodes_total: 4597
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09442970365017042
          cur_lr: 5.000000000000001e-05
          entropy: 0.6337499936421712
          entropy_coeff: 0.009999999999999998
          kl: 0.0069836059198663415
          policy_loss: 0.029458321382602057
          total_loss: 0.035447626726494895
          vf_explained_var: 0.253417044878006
          vf_loss: 0.011667344589821167
    num_agent_steps_sampled: 1226000
    num_agent_steps_trained: 1226000
    num_steps_sampled: 1226000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1226,31728.9,1226000,-2.6627,-2.18,-3.27,266.27


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1227000
  custom_metrics: {}
  date: 2021-11-05_21-21-41
  done: false
  episode_len_mean: 267.71
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.6770999999999874
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 4
  episodes_total: 4601
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09442970365017042
          cur_lr: 5.000000000000001e-05
          entropy: 0.6288950092262692
          entropy_coeff: 0.009999999999999998
          kl: 0.007877520147940468
          policy_loss: 0.043678574429617985
          total_loss: 0.05161717703772916
          vf_explained_var: 0.08660785108804703
          vf_loss: 0.013483682336906591
    num_agent_steps_sampled: 1227000
    num_agent_steps_trained: 1227000
    num_steps_sampled: 1227000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1227,31755.9,1227000,-2.6771,-2.29,-3.27,267.71


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1228000
  custom_metrics: {}
  date: 2021-11-05_21-22-04
  done: false
  episode_len_mean: 268.42
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.6841999999999873
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 3
  episodes_total: 4604
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09442970365017042
          cur_lr: 5.000000000000001e-05
          entropy: 0.41890624380773966
          entropy_coeff: 0.009999999999999998
          kl: 0.005558050409361847
          policy_loss: 0.053380572547515236
          total_loss: 0.057157124082247414
          vf_explained_var: 0.19434045255184174
          vf_loss: 0.007440768353020152
    num_agent_steps_sampled: 1228000
    num_agent_steps_trained: 1228000
    num_steps_sampled: 1228000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1228,31779.2,1228000,-2.6842,-2.29,-3.27,268.42


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1229000
  custom_metrics: {}
  date: 2021-11-05_21-22-27
  done: false
  episode_len_mean: 269.66
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.696599999999986
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 4
  episodes_total: 4608
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09442970365017042
          cur_lr: 5.000000000000001e-05
          entropy: 0.5864017956786686
          entropy_coeff: 0.009999999999999998
          kl: 0.004291739790629221
          policy_loss: 0.027333358095751867
          total_loss: 0.03598770143257247
          vf_explained_var: 0.03891605883836746
          vf_loss: 0.014113093064063125
    num_agent_steps_sampled: 1229000
    num_agent_steps_trained: 1229000
    num_steps_sampled: 1229000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1229,31801.7,1229000,-2.6966,-2.29,-3.27,269.66


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1230000
  custom_metrics: {}
  date: 2021-11-05_21-22-50
  done: false
  episode_len_mean: 270.34
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7033999999999856
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 3
  episodes_total: 4611
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04721485182508521
          cur_lr: 5.000000000000001e-05
          entropy: 0.538121286365721
          entropy_coeff: 0.009999999999999998
          kl: 0.012297665322757901
          policy_loss: -0.014787466824054718
          total_loss: -0.009688115285502539
          vf_explained_var: -0.05969591811299324
          vf_loss: 0.00989992997298638
    num_agent_steps_sampled: 1230000
    num_agent_steps_trained: 1230000
    num_steps_sampled: 1230000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1230,31824.7,1230000,-2.7034,-2.29,-3.27,270.34


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1231000
  custom_metrics: {}
  date: 2021-11-05_21-23-13
  done: false
  episode_len_mean: 270.77
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.707699999999986
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 4
  episodes_total: 4615
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04721485182508521
          cur_lr: 5.000000000000001e-05
          entropy: 0.45934024188253614
          entropy_coeff: 0.009999999999999998
          kl: 0.0235291622169293
          policy_loss: 0.006951730946699778
          total_loss: 0.013956833879152935
          vf_explained_var: 0.19055362045764923
          vf_loss: 0.010487575684156683
    num_agent_steps_sampled: 1231000
    num_agent_steps_trained: 1231000
    num_steps_sampled: 1231000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1231,31848,1231000,-2.7077,-2.29,-3.27,270.77


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1232000
  custom_metrics: {}
  date: 2021-11-05_21-23-37
  done: false
  episode_len_mean: 271.0
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7099999999999858
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 4
  episodes_total: 4619
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07082227773762781
          cur_lr: 5.000000000000001e-05
          entropy: 0.36230109549231
          entropy_coeff: 0.009999999999999998
          kl: 0.001993313171000624
          policy_loss: -0.01335233160191112
          total_loss: -0.006564049753877851
          vf_explained_var: 0.22018080949783325
          vf_loss: 0.010270118433982133
    num_agent_steps_sampled: 1232000
    num_agent_steps_trained: 1232000
    num_steps_sampled: 1232000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1232,31871.4,1232000,-2.71,-2.29,-3.27,271




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1233000
  custom_metrics: {}
  date: 2021-11-05_21-24-16
  done: false
  episode_len_mean: 271.2
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7119999999999864
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 3
  episodes_total: 4622
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.035411138868813904
          cur_lr: 5.000000000000001e-05
          entropy: 0.39496954944398666
          entropy_coeff: 0.009999999999999998
          kl: 0.003990155791804442
          policy_loss: -0.04651712750395139
          total_loss: -0.04279052855239974
          vf_explained_var: 0.3196754455566406
          vf_loss: 0.007534999527140624
    num_agent_steps_sampled: 1233000
    num_agent_steps_trained: 1233000
    num_steps_sampled: 1233000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1233,31911.1,1233000,-2.712,-2.29,-3.27,271.2


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1234000
  custom_metrics: {}
  date: 2021-11-05_21-24-40
  done: false
  episode_len_mean: 271.13
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7112999999999854
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 4
  episodes_total: 4626
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017705569434406952
          cur_lr: 5.000000000000001e-05
          entropy: 0.632289954688814
          entropy_coeff: 0.009999999999999998
          kl: 0.1612576776015364
          policy_loss: 0.03249142948124144
          total_loss: 0.03987540784809324
          vf_explained_var: 0.4674389660358429
          vf_loss: 0.010851719815077054
    num_agent_steps_sampled: 1234000
    num_agent_steps_trained: 1234000
    num_steps_sampled: 1234000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1234,31934.4,1234000,-2.7113,-2.29,-3.27,271.13


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1235000
  custom_metrics: {}
  date: 2021-11-05_21-25-03
  done: false
  episode_len_mean: 271.59
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7158999999999867
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 4
  episodes_total: 4630
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02655835415161042
          cur_lr: 5.000000000000001e-05
          entropy: 0.22839798248476453
          entropy_coeff: 0.009999999999999998
          kl: 0.009027935904528937
          policy_loss: 0.007272157900863224
          total_loss: 0.012370592438512379
          vf_explained_var: 0.44647496938705444
          vf_loss: 0.007142647511015335
    num_agent_steps_sampled: 1235000
    num_agent_steps_trained: 1235000
    num_steps_sampled: 1235000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1235,31957.7,1235000,-2.7159,-2.29,-3.27,271.59


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1236000
  custom_metrics: {}
  date: 2021-11-05_21-25-26
  done: false
  episode_len_mean: 271.71
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.717099999999986
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 3
  episodes_total: 4633
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02655835415161042
          cur_lr: 5.000000000000001e-05
          entropy: 0.342304500275188
          entropy_coeff: 0.009999999999999998
          kl: 0.009246935980756632
          policy_loss: -0.1151709158387449
          total_loss: -0.11140837180945608
          vf_explained_var: 0.3109581470489502
          vf_loss: 0.0069400037845803635
    num_agent_steps_sampled: 1236000
    num_agent_steps_trained: 1236000
    num_steps_sampled: 1236000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1236,31980.5,1236000,-2.7171,-2.29,-3.27,271.71


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1237000
  custom_metrics: {}
  date: 2021-11-05_21-25-48
  done: false
  episode_len_mean: 272.66
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7265999999999866
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 4
  episodes_total: 4637
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02655835415161042
          cur_lr: 5.000000000000001e-05
          entropy: 0.598585773507754
          entropy_coeff: 0.009999999999999998
          kl: 0.029499300635795735
          policy_loss: 0.029519102970759072
          total_loss: 0.03436057902872562
          vf_explained_var: 0.1570759117603302
          vf_loss: 0.010043880705618196
    num_agent_steps_sampled: 1237000
    num_agent_steps_trained: 1237000
    num_steps_sampled: 1237000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1237,32003.1,1237000,-2.7266,-2.29,-3.27,272.66


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1238000
  custom_metrics: {}
  date: 2021-11-05_21-26-10
  done: false
  episode_len_mean: 273.69
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7368999999999857
  episode_reward_min: -3.2699999999999743
  episodes_this_iter: 3
  episodes_total: 4640
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03983753122741563
          cur_lr: 5.000000000000001e-05
          entropy: 0.499781894352701
          entropy_coeff: 0.009999999999999998
          kl: 0.0034421476150424406
          policy_loss: 0.059912896321879495
          total_loss: 0.06207834713988834
          vf_explained_var: -0.1874227523803711
          vf_loss: 0.007026146770092763
    num_agent_steps_sampled: 1238000
    num_agent_steps_trained: 1238000
    num_steps_sampled: 1238000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1238,32025.1,1238000,-2.7369,-2.29,-3.27,273.69


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1239000
  custom_metrics: {}
  date: 2021-11-05_21-26-31
  done: false
  episode_len_mean: 275.4
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.753999999999985
  episode_reward_min: -3.5399999999999685
  episodes_this_iter: 3
  episodes_total: 4643
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.019918765613707815
          cur_lr: 5.000000000000001e-05
          entropy: 0.6876690089702606
          entropy_coeff: 0.009999999999999998
          kl: 0.01329128788378837
          policy_loss: -0.0948410359521707
          total_loss: -0.08994645319051213
          vf_explained_var: -0.0005696276784874499
          vf_loss: 0.01150652767262525
    num_agent_steps_sampled: 1239000
    num_agent_steps_trained: 1239000
    num_steps_sampled: 1239000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1239,32045.5,1239000,-2.754,-2.29,-3.54,275.4


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1240000
  custom_metrics: {}
  date: 2021-11-05_21-26-52
  done: false
  episode_len_mean: 276.8
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7679999999999847
  episode_reward_min: -3.5399999999999685
  episodes_this_iter: 4
  episodes_total: 4647
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.019918765613707815
          cur_lr: 5.000000000000001e-05
          entropy: 0.8126457697815366
          entropy_coeff: 0.009999999999999998
          kl: 0.005209249552648664
          policy_loss: 0.010531906369659635
          total_loss: 0.01356504402226872
          vf_explained_var: 0.1215730682015419
          vf_loss: 0.011055832645959324
    num_agent_steps_sampled: 1240000
    num_agent_steps_trained: 1240000
    num_steps_sampled: 1240000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1240,32067.1,1240000,-2.768,-2.29,-3.54,276.8




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1241000
  custom_metrics: {}
  date: 2021-11-05_21-27-29
  done: false
  episode_len_mean: 277.51
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.7750999999999846
  episode_reward_min: -3.5399999999999685
  episodes_this_iter: 3
  episodes_total: 4650
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.019918765613707815
          cur_lr: 5.000000000000001e-05
          entropy: 0.8042851077185736
          entropy_coeff: 0.009999999999999998
          kl: 0.016861226566466787
          policy_loss: 0.013075721595022414
          total_loss: 0.013801929023530748
          vf_explained_var: -0.19998350739479065
          vf_loss: 0.008433202385074563
    num_agent_steps_sampled: 1241000
    num_agent_steps_trained: 1241000
    num_steps_sampled: 1241000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1241,32104,1241000,-2.7751,-2.29,-3.54,277.51


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1242000
  custom_metrics: {}
  date: 2021-11-05_21-27-51
  done: false
  episode_len_mean: 280.4
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.803999999999984
  episode_reward_min: -4.579999999999947
  episodes_this_iter: 3
  episodes_total: 4653
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.019918765613707815
          cur_lr: 5.000000000000001e-05
          entropy: 0.8860673838191562
          entropy_coeff: 0.009999999999999998
          kl: 0.012600442223860853
          policy_loss: 0.03630287034644021
          total_loss: 0.036195094221168095
          vf_explained_var: 0.15567466616630554
          vf_loss: 0.008501912359820885
    num_agent_steps_sampled: 1242000
    num_agent_steps_trained: 1242000
    num_steps_sampled: 1242000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1242,32125.5,1242000,-2.804,-2.29,-4.58,280.4


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1243000
  custom_metrics: {}
  date: 2021-11-05_21-28-12
  done: false
  episode_len_mean: 282.07
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.8206999999999844
  episode_reward_min: -4.579999999999947
  episodes_this_iter: 3
  episodes_total: 4656
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.019918765613707815
          cur_lr: 5.000000000000001e-05
          entropy: 0.8897865606678856
          entropy_coeff: 0.009999999999999998
          kl: 0.0742596720645395
          policy_loss: 0.06033493396308687
          total_loss: 0.0598700501024723
          vf_explained_var: 0.39126676321029663
          vf_loss: 0.006953822184328197
    num_agent_steps_sampled: 1243000
    num_agent_steps_trained: 1243000
    num_steps_sampled: 1243000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1243,32146.3,1243000,-2.8207,-2.29,-4.58,282.07


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1244000
  custom_metrics: {}
  date: 2021-11-05_21-28-34
  done: false
  episode_len_mean: 283.03
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -2.8302999999999834
  episode_reward_min: -4.579999999999947
  episodes_this_iter: 3
  episodes_total: 4659
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.029878148420561728
          cur_lr: 5.000000000000001e-05
          entropy: 0.8932839837339189
          entropy_coeff: 0.009999999999999998
          kl: 0.00993400545329474
          policy_loss: -0.058011762259735
          total_loss: -0.054559295583102435
          vf_explained_var: 0.3415341377258301
          vf_loss: 0.012088496413909727
    num_agent_steps_sampled: 1244000
    num_agent_steps_trained: 1244000
    num_steps_sampled: 1244000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1244,32168.7,1244000,-2.8303,-2.29,-4.58,283.03


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1245000
  custom_metrics: {}
  date: 2021-11-05_21-28-56
  done: false
  episode_len_mean: 285.03
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.8502999999999834
  episode_reward_min: -4.579999999999947
  episodes_this_iter: 4
  episodes_total: 4663
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.029878148420561728
          cur_lr: 5.000000000000001e-05
          entropy: 0.829270910554462
          entropy_coeff: 0.009999999999999998
          kl: 0.02172666965969058
          policy_loss: 0.04966268067558607
          total_loss: 0.05783935578333007
          vf_explained_var: -0.0026921762619167566
          vf_loss: 0.015820228505051798
    num_agent_steps_sampled: 1245000
    num_agent_steps_trained: 1245000
    num_steps_sampled: 1245000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1245,32190.4,1245000,-2.8503,-2.4,-4.58,285.03


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1246000
  custom_metrics: {}
  date: 2021-11-05_21-29-18
  done: false
  episode_len_mean: 286.32
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.863199999999983
  episode_reward_min: -4.579999999999947
  episodes_this_iter: 3
  episodes_total: 4666
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04481722263084259
          cur_lr: 5.000000000000001e-05
          entropy: 1.1492987950642903
          entropy_coeff: 0.009999999999999998
          kl: 0.020914987778660157
          policy_loss: -0.0067925442424085405
          total_loss: -0.007939570231570138
          vf_explained_var: 0.16999301314353943
          vf_loss: 0.009408612927008007
    num_agent_steps_sampled: 1246000
    num_agent_steps_trained: 1246000
    num_steps_sampled: 1246000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1246,32212.6,1246000,-2.8632,-2.4,-4.58,286.32


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1247000
  custom_metrics: {}
  date: 2021-11-05_21-29-40
  done: false
  episode_len_mean: 286.92
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.8691999999999833
  episode_reward_min: -4.579999999999947
  episodes_this_iter: 3
  episodes_total: 4669
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0672258339462639
          cur_lr: 5.000000000000001e-05
          entropy: 0.9315388467576768
          entropy_coeff: 0.009999999999999998
          kl: 0.01738202446615828
          policy_loss: -0.06867566257715225
          total_loss: -0.06366129252645704
          vf_explained_var: 0.22342701256275177
          vf_loss: 0.013161234252361789
    num_agent_steps_sampled: 1247000
    num_agent_steps_trained: 1247000
    num_steps_sampled: 1247000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1247,32234.1,1247000,-2.8692,-2.4,-4.58,286.92


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1248000
  custom_metrics: {}
  date: 2021-11-05_21-30-00
  done: false
  episode_len_mean: 288.29
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.8828999999999834
  episode_reward_min: -4.579999999999947
  episodes_this_iter: 4
  episodes_total: 4673
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0672258339462639
          cur_lr: 5.000000000000001e-05
          entropy: 1.1667500972747802
          entropy_coeff: 0.009999999999999998
          kl: 0.05031161211408267
          policy_loss: 0.04203732427623537
          total_loss: 0.04676037952303887
          vf_explained_var: 0.45667800307273865
          vf_loss: 0.013008315426607927
    num_agent_steps_sampled: 1248000
    num_agent_steps_trained: 1248000
    num_steps_sampled: 1248000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1248,32254.5,1248000,-2.8829,-2.4,-4.58,288.29


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1249000
  custom_metrics: {}
  date: 2021-11-05_21-30-21
  done: false
  episode_len_mean: 289.49
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.894899999999981
  episode_reward_min: -4.579999999999947
  episodes_this_iter: 3
  episodes_total: 4676
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10083875091939583
          cur_lr: 5.000000000000001e-05
          entropy: 1.1931991179784138
          entropy_coeff: 0.009999999999999998
          kl: 0.009957237056835943
          policy_loss: 0.05469674368699392
          total_loss: 0.05597204466660818
          vf_explained_var: 0.19479456543922424
          vf_loss: 0.012203216649747143
    num_agent_steps_sampled: 1249000
    num_agent_steps_trained: 1249000
    num_steps_sampled: 1249000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1249,32275.6,1249000,-2.8949,-2.4,-4.58,289.49


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1250000
  custom_metrics: {}
  date: 2021-11-05_21-30-42
  done: false
  episode_len_mean: 290.33
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.903299999999982
  episode_reward_min: -4.579999999999947
  episodes_this_iter: 3
  episodes_total: 4679
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10083875091939583
          cur_lr: 5.000000000000001e-05
          entropy: 1.1983844916025796
          entropy_coeff: 0.009999999999999998
          kl: 0.008415605982754985
          policy_loss: 0.04813642965422736
          total_loss: 0.052054944137732186
          vf_explained_var: -0.12707197666168213
          vf_loss: 0.015053739372847808
    num_agent_steps_sampled: 1250000
    num_agent_steps_trained: 1250000
    num_steps_sampled: 1250000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1250,32296.6,1250000,-2.9033,-2.4,-4.58,290.33




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1251000
  custom_metrics: {}
  date: 2021-11-05_21-31-19
  done: false
  episode_len_mean: 292.0
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9199999999999817
  episode_reward_min: -4.579999999999947
  episodes_this_iter: 3
  episodes_total: 4682
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10083875091939583
          cur_lr: 5.000000000000001e-05
          entropy: 1.1358331541220348
          entropy_coeff: 0.009999999999999998
          kl: 0.02370121334747005
          policy_loss: 0.044705605341328516
          total_loss: 0.04649397217565113
          vf_explained_var: -0.1533343642950058
          vf_loss: 0.010756699482832725
    num_agent_steps_sampled: 1251000
    num_agent_steps_trained: 1251000
    num_steps_sampled: 1251000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1251,32332.9,1251000,-2.92,-2.4,-4.58,292


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1252000
  custom_metrics: {}
  date: 2021-11-05_21-31-39
  done: false
  episode_len_mean: 293.44
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.934399999999982
  episode_reward_min: -4.579999999999947
  episodes_this_iter: 3
  episodes_total: 4685
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15125812637909375
          cur_lr: 5.000000000000001e-05
          entropy: 1.171823995643192
          entropy_coeff: 0.009999999999999998
          kl: 0.016040823366186254
          policy_loss: 0.05024024711714851
          total_loss: 0.05120722966061698
          vf_explained_var: -0.13424330949783325
          vf_loss: 0.01025891968036174
    num_agent_steps_sampled: 1252000
    num_agent_steps_trained: 1252000
    num_steps_sampled: 1252000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1252,32353.2,1252000,-2.9344,-2.4,-4.58,293.44


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1253000
  custom_metrics: {}
  date: 2021-11-05_21-31-59
  done: false
  episode_len_mean: 294.95
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9494999999999805
  episode_reward_min: -4.579999999999947
  episodes_this_iter: 3
  episodes_total: 4688
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15125812637909375
          cur_lr: 5.000000000000001e-05
          entropy: 1.1601202911800808
          entropy_coeff: 0.009999999999999998
          kl: 0.01582823545136702
          policy_loss: 0.05265726662344403
          total_loss: 0.05476623624563217
          vf_explained_var: 0.36665114760398865
          vf_loss: 0.011316024957017766
    num_agent_steps_sampled: 1253000
    num_agent_steps_trained: 1253000
    num_steps_sampled: 1253000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1253,32373.2,1253000,-2.9495,-2.4,-4.58,294.95


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1254000
  custom_metrics: {}
  date: 2021-11-05_21-32-18
  done: false
  episode_len_mean: 297.45
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9744999999999813
  episode_reward_min: -4.579999999999947
  episodes_this_iter: 3
  episodes_total: 4691
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15125812637909375
          cur_lr: 5.000000000000001e-05
          entropy: 1.2729175514645046
          entropy_coeff: 0.009999999999999998
          kl: 0.015964078961975915
          policy_loss: 0.07226533649696244
          total_loss: 0.07056086874670453
          vf_explained_var: 0.37774771451950073
          vf_loss: 0.008610006899430624
    num_agent_steps_sampled: 1254000
    num_agent_steps_trained: 1254000
    num_steps_sampled: 1254000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1254,32392.1,1254000,-2.9745,-2.4,-4.58,297.45


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1255000
  custom_metrics: {}
  date: 2021-11-05_21-32-40
  done: false
  episode_len_mean: 298.12
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9811999999999803
  episode_reward_min: -4.579999999999947
  episodes_this_iter: 3
  episodes_total: 4694
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15125812637909375
          cur_lr: 5.000000000000001e-05
          entropy: 1.0400030785136753
          entropy_coeff: 0.009999999999999998
          kl: 0.00431817845850841
          policy_loss: 0.042442407541804845
          total_loss: 0.04299115240573883
          vf_explained_var: 0.34541594982147217
          vf_loss: 0.010295613630053898
    num_agent_steps_sampled: 1255000
    num_agent_steps_trained: 1255000
    num_steps_sampled: 1255000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1255,32414,1255000,-2.9812,-2.4,-4.58,298.12


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1256000
  custom_metrics: {}
  date: 2021-11-05_21-32-59
  done: false
  episode_len_mean: 299.0
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9899999999999802
  episode_reward_min: -4.579999999999947
  episodes_this_iter: 3
  episodes_total: 4697
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07562906318954687
          cur_lr: 5.000000000000001e-05
          entropy: 0.9497890088293287
          entropy_coeff: 0.009999999999999998
          kl: 0.019674106070621634
          policy_loss: -0.15657418908344375
          total_loss: -0.15154671238528358
          vf_explained_var: 0.5130099654197693
          vf_loss: 0.013037432906114393
    num_agent_steps_sampled: 1256000
    num_agent_steps_trained: 1256000
    num_steps_sampled: 1256000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1256,32433.4,1256000,-2.99,-2.4,-4.58,299


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1257000
  custom_metrics: {}
  date: 2021-11-05_21-33-12
  done: false
  episode_len_mean: 304.52
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.0451999999999786
  episode_reward_min: -5.949999999999918
  episodes_this_iter: 2
  episodes_total: 4699
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07562906318954687
          cur_lr: 5.000000000000001e-05
          entropy: 1.4705903238720364
          entropy_coeff: 0.009999999999999998
          kl: 0.024128464389719877
          policy_loss: 0.08088294408387608
          total_loss: 0.07632402355472247
          vf_explained_var: 0.5229519009590149
          vf_loss: 0.008322169692514257
    num_agent_steps_sampled: 1257000
    num_agent_steps_trained: 1257000
    num_steps_sampled: 1257000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1257,32446.6,1257000,-3.0452,-2.4,-5.95,304.52


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1258000
  custom_metrics: {}
  date: 2021-11-05_21-33-28
  done: false
  episode_len_mean: 307.78
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.077799999999978
  episode_reward_min: -5.949999999999918
  episodes_this_iter: 2
  episodes_total: 4701
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11344359478432027
          cur_lr: 5.000000000000001e-05
          entropy: 1.3578727854622734
          entropy_coeff: 0.009999999999999998
          kl: 0.020721352476243106
          policy_loss: -0.08374098191658656
          total_loss: -0.08546720312701331
          vf_explained_var: 0.3550944924354553
          vf_loss: 0.00950180273115014
    num_agent_steps_sampled: 1258000
    num_agent_steps_trained: 1258000
    num_steps_sampled: 1258000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1258,32462.1,1258000,-3.0778,-2.4,-5.95,307.78


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1259000
  custom_metrics: {}
  date: 2021-11-05_21-33-44
  done: false
  episode_len_mean: 312.52
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.125199999999978
  episode_reward_min: -5.949999999999918
  episodes_this_iter: 2
  episodes_total: 4703
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1701653921764805
          cur_lr: 5.000000000000001e-05
          entropy: 1.5226025289959377
          entropy_coeff: 0.009999999999999998
          kl: 0.013210471648581034
          policy_loss: -0.08022762884696324
          total_loss: -0.08353381637069913
          vf_explained_var: 0.17304684221744537
          vf_loss: 0.009671871576251255
    num_agent_steps_sampled: 1259000
    num_agent_steps_trained: 1259000
    num_steps_sampled: 1259000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1259,32478.2,1259000,-3.1252,-2.4,-5.95,312.52


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1260000
  custom_metrics: {}
  date: 2021-11-05_21-34-02
  done: false
  episode_len_mean: 314.51
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.1450999999999767
  episode_reward_min: -5.949999999999918
  episodes_this_iter: 3
  episodes_total: 4706
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1701653921764805
          cur_lr: 5.000000000000001e-05
          entropy: 1.2432039585378436
          entropy_coeff: 0.009999999999999998
          kl: 0.013424782605171891
          policy_loss: 0.006014244258403778
          total_loss: 0.004397062626149919
          vf_explained_var: 0.20440919697284698
          vf_loss: 0.00853042445378378
    num_agent_steps_sampled: 1260000
    num_agent_steps_trained: 1260000
    num_steps_sampled: 1260000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1260,32496.2,1260000,-3.1451,-2.4,-5.95,314.51


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1261000
  custom_metrics: {}
  date: 2021-11-05_21-34-19
  done: false
  episode_len_mean: 318.13
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.1812999999999754
  episode_reward_min: -5.949999999999918
  episodes_this_iter: 2
  episodes_total: 4708
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1701653921764805
          cur_lr: 5.000000000000001e-05
          entropy: 1.6549390766355727
          entropy_coeff: 0.009999999999999998
          kl: 0.014468723229548234
          policy_loss: -0.06507721758551067
          total_loss: -0.06733598411083222
          vf_explained_var: 0.011324634775519371
          vf_loss: 0.011828551358646817
    num_agent_steps_sampled: 1261000
    num_agent_steps_trained: 1261000
    num_steps_sampled: 1261000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1261,32513.2,1261000,-3.1813,-2.4,-5.95,318.13




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1262000
  custom_metrics: {}
  date: 2021-11-05_21-34-55
  done: false
  episode_len_mean: 320.28
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.2027999999999754
  episode_reward_min: -5.949999999999918
  episodes_this_iter: 3
  episodes_total: 4711
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1701653921764805
          cur_lr: 5.000000000000001e-05
          entropy: 1.6720470786094666
          entropy_coeff: 0.009999999999999998
          kl: 0.01630502991890539
          policy_loss: 0.00999637527598275
          total_loss: 0.006467704143789079
          vf_explained_var: -0.19387081265449524
          vf_loss: 0.010417246744166025
    num_agent_steps_sampled: 1262000
    num_agent_steps_trained: 1262000
    num_steps_sampled: 1262000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1262,32548.6,1262000,-3.2028,-2.4,-5.95,320.28


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1263000
  custom_metrics: {}
  date: 2021-11-05_21-35-10
  done: false
  episode_len_mean: 323.21
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.2320999999999747
  episode_reward_min: -5.949999999999918
  episodes_this_iter: 2
  episodes_total: 4713
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1701653921764805
          cur_lr: 5.000000000000001e-05
          entropy: 1.3855380084779527
          entropy_coeff: 0.009999999999999998
          kl: 0.010889382215253898
          policy_loss: -0.06280008422003852
          total_loss: -0.06553148097462125
          vf_explained_var: -0.005194099619984627
          vf_loss: 0.009270988016699752
    num_agent_steps_sampled: 1263000
    num_agent_steps_trained: 1263000
    num_steps_sampled: 1263000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1263,32563.9,1263000,-3.2321,-2.4,-5.95,323.21


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1264000
  custom_metrics: {}
  date: 2021-11-05_21-35-31
  done: false
  episode_len_mean: 325.35
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.2534999999999745
  episode_reward_min: -5.949999999999918
  episodes_this_iter: 4
  episodes_total: 4717
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1701653921764805
          cur_lr: 5.000000000000001e-05
          entropy: 0.8893012053436703
          entropy_coeff: 0.009999999999999998
          kl: 0.006932911159580928
          policy_loss: -0.05492210288842519
          total_loss: -0.053453880962398316
          vf_explained_var: 0.48950424790382385
          vf_loss: 0.009181489459135467
    num_agent_steps_sampled: 1264000
    num_agent_steps_trained: 1264000
    num_steps_sampled: 1264000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1264,32585.2,1264000,-3.2535,-2.4,-5.95,325.35


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1265000
  custom_metrics: {}
  date: 2021-11-05_21-35-46
  done: false
  episode_len_mean: 329.47
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.294699999999974
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 2
  episodes_total: 4719
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1701653921764805
          cur_lr: 5.000000000000001e-05
          entropy: 1.2011585238907072
          entropy_coeff: 0.009999999999999998
          kl: 0.015141815078089019
          policy_loss: 0.0481584327088462
          total_loss: 0.04394532408979204
          vf_explained_var: 0.5786814093589783
          vf_loss: 0.005221865895307726
    num_agent_steps_sampled: 1265000
    num_agent_steps_trained: 1265000
    num_steps_sampled: 1265000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1265,32600,1265000,-3.2947,-2.4,-6.66,329.47


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1266000
  custom_metrics: {}
  date: 2021-11-05_21-36-03
  done: false
  episode_len_mean: 333.11
  episode_media: {}
  episode_reward_max: -2.669999999999987
  episode_reward_mean: -3.3310999999999735
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 2
  episodes_total: 4721
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1701653921764805
          cur_lr: 5.000000000000001e-05
          entropy: 1.4133888827429877
          entropy_coeff: 0.009999999999999998
          kl: 0.026072153043725798
          policy_loss: 0.018223568714327284
          total_loss: 0.013854223572545581
          vf_explained_var: 0.49305883049964905
          vf_loss: 0.0053279675135854635
    num_agent_steps_sampled: 1266000
    num_agent_steps_trained: 1266000
    num_steps_sampled: 1266000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1266,32616.8,1266000,-3.3311,-2.67,-6.66,333.11


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1267000
  custom_metrics: {}
  date: 2021-11-05_21-36-22
  done: false
  episode_len_mean: 333.29
  episode_media: {}
  episode_reward_max: -2.669999999999987
  episode_reward_mean: -3.3328999999999733
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 3
  episodes_total: 4724
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2552480882647208
          cur_lr: 5.000000000000001e-05
          entropy: 0.6077040791511535
          entropy_coeff: 0.009999999999999998
          kl: 0.004100611699738168
          policy_loss: -0.08790031770865123
          total_loss: -0.08354559623532826
          vf_explained_var: 0.48978284001350403
          vf_loss: 0.009385089772856898
    num_agent_steps_sampled: 1267000
    num_agent_steps_trained: 1267000
    num_steps_sampled: 1267000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1267,32636.4,1267000,-3.3329,-2.67,-6.66,333.29


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1268000
  custom_metrics: {}
  date: 2021-11-05_21-36-42
  done: false
  episode_len_mean: 336.25
  episode_media: {}
  episode_reward_max: -2.669999999999987
  episode_reward_mean: -3.3624999999999714
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 3
  episodes_total: 4727
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1276240441323604
          cur_lr: 5.000000000000001e-05
          entropy: 0.8200852619277106
          entropy_coeff: 0.009999999999999998
          kl: 0.01050879855002083
          policy_loss: -0.0922209448284573
          total_loss: -0.08931741317113241
          vf_explained_var: 0.5262771248817444
          vf_loss: 0.009763208277420037
    num_agent_steps_sampled: 1268000
    num_agent_steps_trained: 1268000
    num_steps_sampled: 1268000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1268,32655.6,1268000,-3.3625,-2.67,-6.66,336.25


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1269000
  custom_metrics: {}
  date: 2021-11-05_21-36-59
  done: false
  episode_len_mean: 340.57
  episode_media: {}
  episode_reward_max: -2.669999999999987
  episode_reward_mean: -3.405699999999971
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 2
  episodes_total: 4729
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1276240441323604
          cur_lr: 5.000000000000001e-05
          entropy: 1.5763582931624518
          entropy_coeff: 0.009999999999999998
          kl: 0.015093837151483823
          policy_loss: -0.1118017390370369
          total_loss: -0.11515428572893142
          vf_explained_var: 0.25595057010650635
          vf_loss: 0.010484695237957769
    num_agent_steps_sampled: 1269000
    num_agent_steps_trained: 1269000
    num_steps_sampled: 1269000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1269,32672.6,1269000,-3.4057,-2.67,-6.66,340.57


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1270000
  custom_metrics: {}
  date: 2021-11-05_21-37-22
  done: false
  episode_len_mean: 340.72
  episode_media: {}
  episode_reward_max: -2.669999999999987
  episode_reward_mean: -3.407199999999971
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 4
  episodes_total: 4733
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1276240441323604
          cur_lr: 5.000000000000001e-05
          entropy: 0.5428534540865156
          entropy_coeff: 0.009999999999999998
          kl: 0.0072246885456575035
          policy_loss: 0.02917001959350374
          total_loss: 0.029912562916676204
          vf_explained_var: 0.7445672154426575
          vf_loss: 0.005249034608196881
    num_agent_steps_sampled: 1270000
    num_agent_steps_trained: 1270000
    num_steps_sampled: 1270000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1270,32695.8,1270000,-3.4072,-2.67,-6.66,340.72


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1271000
  custom_metrics: {}
  date: 2021-11-05_21-37-45
  done: false
  episode_len_mean: 340.21
  episode_media: {}
  episode_reward_max: -2.669999999999987
  episode_reward_mean: -3.4020999999999715
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 4
  episodes_total: 4737
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1276240441323604
          cur_lr: 5.000000000000001e-05
          entropy: 0.43378854625754887
          entropy_coeff: 0.009999999999999998
          kl: 0.004055499508952209
          policy_loss: -0.05835437931948238
          total_loss: -0.051315370657377776
          vf_explained_var: 0.32989081740379333
          vf_loss: 0.010859315045591858
    num_agent_steps_sampled: 1271000
    num_agent_steps_trained: 1271000
    num_steps_sampled: 1271000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1271,32719.1,1271000,-3.4021,-2.67,-6.66,340.21




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1272000
  custom_metrics: {}
  date: 2021-11-05_21-38-23
  done: false
  episode_len_mean: 339.22
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.3921999999999723
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 3
  episodes_total: 4740
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0638120220661802
          cur_lr: 5.000000000000001e-05
          entropy: 0.4001559019088745
          entropy_coeff: 0.009999999999999998
          kl: 0.004694855677002686
          policy_loss: -0.08722315803170204
          total_loss: -0.08503208955128988
          vf_explained_var: 0.7242554426193237
          vf_loss: 0.005893038616826137
    num_agent_steps_sampled: 1272000
    num_agent_steps_trained: 1272000
    num_steps_sampled: 1272000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1272,32757.3,1272000,-3.3922,-2.38,-6.66,339.22


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1273000
  custom_metrics: {}
  date: 2021-11-05_21-38-44
  done: false
  episode_len_mean: 340.98
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.4097999999999717
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 3
  episodes_total: 4743
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0319060110330901
          cur_lr: 5.000000000000001e-05
          entropy: 0.9256932493713167
          entropy_coeff: 0.009999999999999998
          kl: 0.017428739820695684
          policy_loss: -0.0004380186398824056
          total_loss: -2.0302500989702014e-05
          vf_explained_var: 0.025931840762495995
          vf_loss: 0.009118570719793852
    num_agent_steps_sampled: 1273000
    num_agent_steps_trained: 1273000
    num_steps_sampled: 1273000
    num_steps

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1273,32777.4,1273000,-3.4098,-2.38,-6.66,340.98


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1274000
  custom_metrics: {}
  date: 2021-11-05_21-38-58
  done: false
  episode_len_mean: 344.27
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.4426999999999714
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 2
  episodes_total: 4745
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0319060110330901
          cur_lr: 5.000000000000001e-05
          entropy: 1.369982420735889
          entropy_coeff: 0.009999999999999998
          kl: 0.03582139538884732
          policy_loss: -0.04908708673384454
          total_loss: -0.05191257492535644
          vf_explained_var: -0.17452631890773773
          vf_loss: 0.00973141844280892
    num_agent_steps_sampled: 1274000
    num_agent_steps_trained: 1274000
    num_steps_sampled: 1274000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1274,32791.6,1274000,-3.4427,-2.38,-6.66,344.27


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1275000
  custom_metrics: {}
  date: 2021-11-05_21-39-17
  done: false
  episode_len_mean: 345.98
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.4597999999999702
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 3
  episodes_total: 4748
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04785901654963513
          cur_lr: 5.000000000000001e-05
          entropy: 1.391515056292216
          entropy_coeff: 0.009999999999999998
          kl: 0.030769018454688494
          policy_loss: 0.057459590832392375
          total_loss: 0.052133379379908244
          vf_explained_var: 0.5325982570648193
          vf_loss: 0.007116361575511595
    num_agent_steps_sampled: 1275000
    num_agent_steps_trained: 1275000
    num_steps_sampled: 1275000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1275,32810.9,1275000,-3.4598,-2.38,-6.66,345.98


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1276000
  custom_metrics: {}
  date: 2021-11-05_21-39-38
  done: false
  episode_len_mean: 344.26
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.442599999999971
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 3
  episodes_total: 4751
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07178852482445272
          cur_lr: 5.000000000000001e-05
          entropy: 1.2481485724449157
          entropy_coeff: 0.009999999999999998
          kl: 0.007957587359618717
          policy_loss: 0.01879589152004984
          total_loss: 0.014635294924179714
          vf_explained_var: 0.5644395351409912
          vf_loss: 0.007749624979785747
    num_agent_steps_sampled: 1276000
    num_agent_steps_trained: 1276000
    num_steps_sampled: 1276000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1276,32831.4,1276000,-3.4426,-2.38,-6.66,344.26


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1277000
  custom_metrics: {}
  date: 2021-11-05_21-39-59
  done: false
  episode_len_mean: 344.92
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.4491999999999705
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 3
  episodes_total: 4754
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07178852482445272
          cur_lr: 5.000000000000001e-05
          entropy: 1.2600203858481513
          entropy_coeff: 0.009999999999999998
          kl: 0.01496682094279183
          policy_loss: -0.14980155378580093
          total_loss: -0.14819518013132943
          vf_explained_var: 0.31232473254203796
          vf_loss: 0.013132131213529243
    num_agent_steps_sampled: 1277000
    num_agent_steps_trained: 1277000
    num_steps_sampled: 1277000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1277,32852.8,1277000,-3.4492,-2.38,-6.66,344.92


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1278000
  custom_metrics: {}
  date: 2021-11-05_21-40-20
  done: false
  episode_len_mean: 345.47
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.45469999999997
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 4
  episodes_total: 4758
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07178852482445272
          cur_lr: 5.000000000000001e-05
          entropy: 1.3309954722722372
          entropy_coeff: 0.009999999999999998
          kl: 0.014923686567641701
          policy_loss: 0.058619989910059504
          total_loss: 0.05520933986537986
          vf_explained_var: 0.5384632349014282
          vf_loss: 0.008827954763546586
    num_agent_steps_sampled: 1278000
    num_agent_steps_trained: 1278000
    num_steps_sampled: 1278000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1278,32873.3,1278000,-3.4547,-2.38,-6.66,345.47


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1279000
  custom_metrics: {}
  date: 2021-11-05_21-40-37
  done: false
  episode_len_mean: 348.05
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.48049999999997
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 2
  episodes_total: 4760
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07178852482445272
          cur_lr: 5.000000000000001e-05
          entropy: 1.6106615861256917
          entropy_coeff: 0.009999999999999998
          kl: 0.013103326823621942
          policy_loss: 0.09547018424297372
          total_loss: 0.08334590907519063
          vf_explained_var: 0.4377099275588989
          vf_loss: 0.00304166938262319
    num_agent_steps_sampled: 1279000
    num_agent_steps_trained: 1279000
    num_steps_sampled: 1279000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1279,32890.6,1279000,-3.4805,-2.38,-6.66,348.05


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1280000
  custom_metrics: {}
  date: 2021-11-05_21-40-56
  done: false
  episode_len_mean: 348.7
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.486999999999969
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 3
  episodes_total: 4763
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07178852482445272
          cur_lr: 5.000000000000001e-05
          entropy: 1.5781121227476331
          entropy_coeff: 0.009999999999999998
          kl: 0.043680179017383754
          policy_loss: 0.007055322660340203
          total_loss: 0.005143448048167758
          vf_explained_var: 0.5395622849464417
          vf_loss: 0.010733512507027223
    num_agent_steps_sampled: 1280000
    num_agent_steps_trained: 1280000
    num_steps_sampled: 1280000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1280,32910.1,1280000,-3.487,-2.38,-6.66,348.7


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1281000
  custom_metrics: {}
  date: 2021-11-05_21-41-16
  done: false
  episode_len_mean: 349.69
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.4968999999999695
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 3
  episodes_total: 4766
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10768278723667905
          cur_lr: 5.000000000000001e-05
          entropy: 1.5907946242226494
          entropy_coeff: 0.009999999999999998
          kl: 0.01605601544117863
          policy_loss: -0.047377347118324704
          total_loss: -0.0512382921245363
          vf_explained_var: 0.3429487645626068
          vf_loss: 0.01031804564408958
    num_agent_steps_sampled: 1281000
    num_agent_steps_trained: 1281000
    num_steps_sampled: 1281000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1281,32930,1281000,-3.4969,-2.38,-6.66,349.69


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1282000
  custom_metrics: {}
  date: 2021-11-05_21-41-37
  done: false
  episode_len_mean: 350.85
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.508499999999968
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 3
  episodes_total: 4769
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10768278723667905
          cur_lr: 5.000000000000001e-05
          entropy: 1.3910965985722012
          entropy_coeff: 0.009999999999999998
          kl: 0.01404575769712307
          policy_loss: -0.028319161964787377
          total_loss: -0.02862992783387502
          vf_explained_var: -0.04788437485694885
          vf_loss: 0.012087710407407333
    num_agent_steps_sampled: 1282000
    num_agent_steps_trained: 1282000
    num_steps_sampled: 1282000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1282,32950.7,1282000,-3.5085,-2.38,-6.66,350.85




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1283000
  custom_metrics: {}
  date: 2021-11-05_21-42-17
  done: false
  episode_len_mean: 349.74
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.497399999999969
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 4
  episodes_total: 4773
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10768278723667905
          cur_lr: 5.000000000000001e-05
          entropy: 1.4139234476619296
          entropy_coeff: 0.009999999999999998
          kl: 0.014777240444118671
          policy_loss: -0.03182331033878856
          total_loss: -0.035459856854544745
          vf_explained_var: 0.4579494595527649
          vf_loss: 0.00891143453773111
    num_agent_steps_sampled: 1283000
    num_agent_steps_trained: 1283000
    num_steps_sampled: 1283000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1283,32990.6,1283000,-3.4974,-2.38,-6.66,349.74


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1284000
  custom_metrics: {}
  date: 2021-11-05_21-42-38
  done: false
  episode_len_mean: 349.53
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.4952999999999697
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 3
  episodes_total: 4776
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10768278723667905
          cur_lr: 5.000000000000001e-05
          entropy: 1.5899286164177788
          entropy_coeff: 0.009999999999999998
          kl: 0.030865098018495758
          policy_loss: 0.010757047434647877
          total_loss: 0.004053729772567749
          vf_explained_var: 0.6819689273834229
          vf_loss: 0.005872326866827078
    num_agent_steps_sampled: 1284000
    num_agent_steps_trained: 1284000
    num_steps_sampled: 1284000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1284,33011.9,1284000,-3.4953,-2.38,-6.66,349.53


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1285000
  custom_metrics: {}
  date: 2021-11-05_21-42-59
  done: false
  episode_len_mean: 349.55
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.4954999999999696
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 3
  episodes_total: 4779
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16152418085501852
          cur_lr: 5.000000000000001e-05
          entropy: 1.4551921592818367
          entropy_coeff: 0.009999999999999998
          kl: 0.01925712495328078
          policy_loss: 0.00848147492441866
          total_loss: 0.00412660435669952
          vf_explained_var: 0.6979100108146667
          vf_loss: 0.0070865573167490465
    num_agent_steps_sampled: 1285000
    num_agent_steps_trained: 1285000
    num_steps_sampled: 1285000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1285,33032.9,1285000,-3.4955,-2.38,-6.66,349.55


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1286000
  custom_metrics: {}
  date: 2021-11-05_21-43-20
  done: false
  episode_len_mean: 348.82
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.4881999999999698
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 3
  episodes_total: 4782
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16152418085501852
          cur_lr: 5.000000000000001e-05
          entropy: 1.5098977208137512
          entropy_coeff: 0.009999999999999998
          kl: 0.0258335969834542
          policy_loss: 0.029510520357224675
          total_loss: 0.02816931688123279
          vf_explained_var: 0.6101700067520142
          vf_loss: 0.00958502579273449
    num_agent_steps_sampled: 1286000
    num_agent_steps_trained: 1286000
    num_steps_sampled: 1286000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1286,33053.1,1286000,-3.4882,-2.38,-6.66,348.82


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1287000
  custom_metrics: {}
  date: 2021-11-05_21-43-37
  done: false
  episode_len_mean: 350.21
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.50209999999997
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 3
  episodes_total: 4785
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24228627128252786
          cur_lr: 5.000000000000001e-05
          entropy: 1.5853577825758192
          entropy_coeff: 0.009999999999999998
          kl: 0.013029799484700986
          policy_loss: 0.033265824388298726
          total_loss: 0.030609064859648545
          vf_explained_var: 0.3820733428001404
          vf_loss: 0.010039876739028841
    num_agent_steps_sampled: 1287000
    num_agent_steps_trained: 1287000
    num_steps_sampled: 1287000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1287,33070.8,1287000,-3.5021,-2.38,-6.66,350.21


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1288000
  custom_metrics: {}
  date: 2021-11-05_21-43-55
  done: false
  episode_len_mean: 351.23
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.5122999999999696
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 2
  episodes_total: 4787
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24228627128252786
          cur_lr: 5.000000000000001e-05
          entropy: 1.4733648432625666
          entropy_coeff: 0.009999999999999998
          kl: 0.019333119279437917
          policy_loss: -0.07282320832212766
          total_loss: -0.0729710739519861
          vf_explained_var: 0.5060082674026489
          vf_loss: 0.009901633492619213
    num_agent_steps_sampled: 1288000
    num_agent_steps_trained: 1288000
    num_steps_sampled: 1288000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1288,33088.5,1288000,-3.5123,-2.38,-6.66,351.23


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1289000
  custom_metrics: {}
  date: 2021-11-05_21-44-13
  done: false
  episode_len_mean: 352.8
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.5279999999999685
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 3
  episodes_total: 4790
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24228627128252786
          cur_lr: 5.000000000000001e-05
          entropy: 1.5491347988446553
          entropy_coeff: 0.009999999999999998
          kl: 0.009992983804414695
          policy_loss: 0.028740009334352282
          total_loss: 0.027071240378750694
          vf_explained_var: 0.5170402526855469
          vf_loss: 0.011401418621729439
    num_agent_steps_sampled: 1289000
    num_agent_steps_trained: 1289000
    num_steps_sampled: 1289000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1289,33105.9,1289000,-3.528,-2.38,-6.66,352.8


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1290000
  custom_metrics: {}
  date: 2021-11-05_21-44-34
  done: false
  episode_len_mean: 352.5
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.524999999999969
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 3
  episodes_total: 4793
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24228627128252786
          cur_lr: 5.000000000000001e-05
          entropy: 1.2627552376853095
          entropy_coeff: 0.009999999999999998
          kl: 0.012788100979451478
          policy_loss: 0.006240772621499168
          total_loss: 0.003992074810796314
          vf_explained_var: 0.5916613936424255
          vf_loss: 0.007280472435781525
    num_agent_steps_sampled: 1290000
    num_agent_steps_trained: 1290000
    num_steps_sampled: 1290000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1290,33127.1,1290000,-3.525,-2.38,-6.66,352.5


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1291000
  custom_metrics: {}
  date: 2021-11-05_21-44-54
  done: false
  episode_len_mean: 353.3
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.532999999999969
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 3
  episodes_total: 4796
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24228627128252786
          cur_lr: 5.000000000000001e-05
          entropy: 1.3045826978153652
          entropy_coeff: 0.009999999999999998
          kl: 0.03798666872719683
          policy_loss: -0.010985382811890708
          total_loss: -0.00817501764330599
          vf_explained_var: 0.4636678397655487
          vf_loss: 0.0066525467697324024
    num_agent_steps_sampled: 1291000
    num_agent_steps_trained: 1291000
    num_steps_sampled: 1291000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1291,33147.5,1291000,-3.533,-2.38,-6.66,353.3


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1292000
  custom_metrics: {}
  date: 2021-11-05_21-45-15
  done: false
  episode_len_mean: 348.61
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.4860999999999693
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 3
  episodes_total: 4799
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.315117151207394
          entropy_coeff: 0.009999999999999998
          kl: 0.007773020882399193
          policy_loss: -0.12718347162008287
          total_loss: -0.12970057626565298
          vf_explained_var: 0.2864299714565277
          vf_loss: 0.007809123314089245
    num_agent_steps_sampled: 1292000
    num_agent_steps_trained: 1292000
    num_steps_sampled: 1292000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1292,33168.3,1292000,-3.4861,-2.38,-6.66,348.61




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1293000
  custom_metrics: {}
  date: 2021-11-05_21-45-53
  done: false
  episode_len_mean: 344.45
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.4444999999999704
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 3
  episodes_total: 4802
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.3862795922491284
          entropy_coeff: 0.009999999999999998
          kl: 0.00895390678946991
          policy_loss: -0.12395551403363546
          total_loss: -0.12581394240260124
          vf_explained_var: 0.3740628659725189
          vf_loss: 0.008750254029615058
    num_agent_steps_sampled: 1293000
    num_agent_steps_trained: 1293000
    num_steps_sampled: 1293000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1293,33206.3,1293000,-3.4445,-2.38,-6.66,344.45


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1294000
  custom_metrics: {}
  date: 2021-11-05_21-46-14
  done: false
  episode_len_mean: 341.6
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.415999999999971
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 4
  episodes_total: 4806
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.1801109433174133
          entropy_coeff: 0.009999999999999998
          kl: 0.0061344576703075086
          policy_loss: -0.03344935807916853
          total_loss: -0.033243354161580405
          vf_explained_var: 0.23436833918094635
          vf_loss: 0.009777671854115195
    num_agent_steps_sampled: 1294000
    num_agent_steps_trained: 1294000
    num_steps_sampled: 1294000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1294,33227.1,1294000,-3.416,-2.38,-6.66,341.6


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1295000
  custom_metrics: {}
  date: 2021-11-05_21-46-35
  done: false
  episode_len_mean: 338.61
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.386099999999971
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 3
  episodes_total: 4809
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.3498892347017923
          entropy_coeff: 0.009999999999999998
          kl: 0.007733386397876214
          policy_loss: 0.03307105071014828
          total_loss: 0.030318783554765912
          vf_explained_var: 0.2583656311035156
          vf_loss: 0.00793608440952893
    num_agent_steps_sampled: 1295000
    num_agent_steps_trained: 1295000
    num_steps_sampled: 1295000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1295,33248.2,1295000,-3.3861,-2.38,-6.66,338.61


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1296000
  custom_metrics: {}
  date: 2021-11-05_21-46-59
  done: false
  episode_len_mean: 333.22
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.3321999999999723
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 4
  episodes_total: 4813
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.1530071377754212
          entropy_coeff: 0.009999999999999998
          kl: 0.0051404811356529
          policy_loss: -0.00585916323794259
          total_loss: -0.003470411151647568
          vf_explained_var: 0.031204218044877052
          vf_loss: 0.012050621304661035
    num_agent_steps_sampled: 1296000
    num_agent_steps_trained: 1296000
    num_steps_sampled: 1296000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1296,33272,1296000,-3.3322,-2.38,-6.66,333.22


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1297000
  custom_metrics: {}
  date: 2021-11-05_21-47-21
  done: false
  episode_len_mean: 332.13
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.3212999999999733
  episode_reward_min: -6.659999999999902
  episodes_this_iter: 3
  episodes_total: 4816
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.2849170406659445
          entropy_coeff: 0.009999999999999998
          kl: 0.007169671498676343
          policy_loss: 0.08077607784006331
          total_loss: 0.07446623891592026
          vf_explained_var: 0.26425033807754517
          vf_loss: 0.00393366446498678
    num_agent_steps_sampled: 1297000
    num_agent_steps_trained: 1297000
    num_steps_sampled: 1297000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1297,33293.7,1297000,-3.3213,-2.38,-6.66,332.13


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1298000
  custom_metrics: {}
  date: 2021-11-05_21-47-41
  done: false
  episode_len_mean: 329.79
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.2978999999999736
  episode_reward_min: -6.339999999999909
  episodes_this_iter: 3
  episodes_total: 4819
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.573415047592587
          entropy_coeff: 0.009999999999999998
          kl: 0.012085969570006228
          policy_loss: -0.023615347345670064
          total_loss: -0.025553872270716562
          vf_explained_var: 0.39361390471458435
          vf_loss: 0.009403226763889607
    num_agent_steps_sampled: 1298000
    num_agent_steps_trained: 1298000
    num_steps_sampled: 1298000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1298,33313.8,1298000,-3.2979,-2.38,-6.34,329.79


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1299000
  custom_metrics: {}
  date: 2021-11-05_21-48-01
  done: false
  episode_len_mean: 327.49
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.274899999999974
  episode_reward_min: -6.339999999999909
  episodes_this_iter: 3
  episodes_total: 4822
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.4810495919651456
          entropy_coeff: 0.009999999999999998
          kl: 0.009202077937678806
          policy_loss: 0.04121545967128542
          total_loss: 0.0374987350569831
          vf_explained_var: 0.490919291973114
          vf_loss: 0.007749462725930951
    num_agent_steps_sampled: 1299000
    num_agent_steps_trained: 1299000
    num_steps_sampled: 1299000
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1299,33334.4,1299000,-3.2749,-2.38,-6.34,327.49


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1300000
  custom_metrics: {}
  date: 2021-11-05_21-48-24
  done: false
  episode_len_mean: 325.51
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.2550999999999743
  episode_reward_min: -6.339999999999909
  episodes_this_iter: 3
  episodes_total: 4825
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.3186664720376333
          entropy_coeff: 0.009999999999999998
          kl: 0.010923266350119813
          policy_loss: -0.10033544020520316
          total_loss: -0.1018474163280593
          vf_explained_var: 0.6626536250114441
          vf_loss: 0.007704851031303406
    num_agent_steps_sampled: 1300000
    num_agent_steps_trained: 1300000
    num_steps_sampled: 1300000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1300,33356.9,1300000,-3.2551,-2.38,-6.34,325.51


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1301000
  custom_metrics: {}
  date: 2021-11-05_21-48-46
  done: false
  episode_len_mean: 321.61
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.2160999999999746
  episode_reward_min: -6.339999999999909
  episodes_this_iter: 4
  episodes_total: 4829
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.3277568565474616
          entropy_coeff: 0.009999999999999998
          kl: 0.006767663418430006
          policy_loss: 0.018524019751283857
          total_loss: 0.015066325747304493
          vf_explained_var: 0.7287585735321045
          vf_loss: 0.007360306087260445
    num_agent_steps_sampled: 1301000
    num_agent_steps_trained: 1301000
    num_steps_sampled: 1301000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1301,33379.1,1301000,-3.2161,-2.38,-6.34,321.61




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1302000
  custom_metrics: {}
  date: 2021-11-05_21-49-27
  done: false
  episode_len_mean: 321.67
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.216699999999975
  episode_reward_min: -6.339999999999909
  episodes_this_iter: 3
  episodes_total: 4832
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.239285581641727
          entropy_coeff: 0.009999999999999998
          kl: 0.009026663094040464
          policy_loss: -0.04587149024009705
          total_loss: -0.048848615255620745
          vf_explained_var: 0.8323214054107666
          vf_loss: 0.006135176952617864
    num_agent_steps_sampled: 1302000
    num_agent_steps_trained: 1302000
    num_steps_sampled: 1302000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1302,33419.7,1302000,-3.2167,-2.38,-6.34,321.67


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1303000
  custom_metrics: {}
  date: 2021-11-05_21-49-48
  done: false
  episode_len_mean: 323.01
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.230099999999975
  episode_reward_min: -6.339999999999909
  episodes_this_iter: 4
  episodes_total: 4836
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.3951753113004897
          entropy_coeff: 0.009999999999999998
          kl: 0.010259574407771312
          policy_loss: -0.07307199421856138
          total_loss: -0.07556051843696171
          vf_explained_var: 0.782918393611908
          vf_loss: 0.007734591984707448
    num_agent_steps_sampled: 1303000
    num_agent_steps_trained: 1303000
    num_steps_sampled: 1303000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1303,33441.3,1303000,-3.2301,-2.38,-6.34,323.01


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1304000
  custom_metrics: {}
  date: 2021-11-05_21-50-12
  done: false
  episode_len_mean: 323.04
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.2303999999999746
  episode_reward_min: -6.339999999999909
  episodes_this_iter: 3
  episodes_total: 4839
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.255540543132358
          entropy_coeff: 0.009999999999999998
          kl: 0.0055072128614617735
          policy_loss: -0.02331144097778532
          total_loss: -0.027212570276525287
          vf_explained_var: 0.783671498298645
          vf_loss: 0.006652792457801601
    num_agent_steps_sampled: 1304000
    num_agent_steps_trained: 1304000
    num_steps_sampled: 1304000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1304,33464.6,1304000,-3.2304,-2.38,-6.34,323.04


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1305000
  custom_metrics: {}
  date: 2021-11-05_21-50-34
  done: false
  episode_len_mean: 320.74
  episode_media: {}
  episode_reward_max: -2.4599999999999915
  episode_reward_mean: -3.207399999999976
  episode_reward_min: -6.339999999999909
  episodes_this_iter: 4
  episodes_total: 4843
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.1868703020943536
          entropy_coeff: 0.009999999999999998
          kl: 0.007997073672000077
          policy_loss: -0.08625174264113109
          total_loss: -0.08588029758797752
          vf_explained_var: 0.7275184392929077
          vf_loss: 0.00933377315911154
    num_agent_steps_sampled: 1305000
    num_agent_steps_trained: 1305000
    num_steps_sampled: 1305000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1305,33486.6,1305000,-3.2074,-2.46,-6.34,320.74


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1306000
  custom_metrics: {}
  date: 2021-11-05_21-50-55
  done: false
  episode_len_mean: 317.09
  episode_media: {}
  episode_reward_max: -2.4599999999999915
  episode_reward_mean: -3.170899999999976
  episode_reward_min: -4.42999999999995
  episodes_this_iter: 3
  episodes_total: 4846
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.315137349234687
          entropy_coeff: 0.009999999999999998
          kl: 0.008472809264961571
          policy_loss: 0.13350781525174776
          total_loss: 0.13349012633164722
          vf_explained_var: 0.4991035759449005
          vf_loss: 0.01005441639572382
    num_agent_steps_sampled: 1306000
    num_agent_steps_trained: 1306000
    num_steps_sampled: 1306000
    num_steps_trained: 130

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1306,33507.7,1306000,-3.1709,-2.46,-4.43,317.09


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1307000
  custom_metrics: {}
  date: 2021-11-05_21-51-18
  done: false
  episode_len_mean: 315.39
  episode_media: {}
  episode_reward_max: -2.4599999999999915
  episode_reward_mean: -3.1538999999999766
  episode_reward_min: -4.42999999999995
  episodes_this_iter: 4
  episodes_total: 4850
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.0579210850927565
          entropy_coeff: 0.009999999999999998
          kl: 0.007606642817199866
          policy_loss: -0.057867412351899676
          total_loss: -0.057999197724792693
          vf_explained_var: 0.6422083377838135
          vf_loss: 0.007682944205589593
    num_agent_steps_sampled: 1307000
    num_agent_steps_trained: 1307000
    num_steps_sampled: 1307000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1307,33530.8,1307000,-3.1539,-2.46,-4.43,315.39


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1308000
  custom_metrics: {}
  date: 2021-11-05_21-51-39
  done: false
  episode_len_mean: 314.04
  episode_media: {}
  episode_reward_max: -2.4599999999999915
  episode_reward_mean: -3.140399999999977
  episode_reward_min: -4.42999999999995
  episodes_this_iter: 3
  episodes_total: 4853
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.2480634596612719
          entropy_coeff: 0.009999999999999998
          kl: 0.00890860303370573
          policy_loss: 0.03020479211376773
          total_loss: 0.027400971493787236
          vf_explained_var: 0.7030320167541504
          vf_loss: 0.006439165455392666
    num_agent_steps_sampled: 1308000
    num_agent_steps_trained: 1308000
    num_steps_sampled: 1308000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1308,33551.9,1308000,-3.1404,-2.46,-4.43,314.04


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1309000
  custom_metrics: {}
  date: 2021-11-05_21-52-01
  done: false
  episode_len_mean: 314.93
  episode_media: {}
  episode_reward_max: -2.4599999999999915
  episode_reward_mean: -3.1492999999999762
  episode_reward_min: -4.42999999999995
  episodes_this_iter: 3
  episodes_total: 4856
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.2843706303172642
          entropy_coeff: 0.009999999999999998
          kl: 0.018944463812777416
          policy_loss: 0.06624364405870438
          total_loss: 0.0668595448964172
          vf_explained_var: 0.6683661341667175
          vf_loss: 0.006574633983998663
    num_agent_steps_sampled: 1309000
    num_agent_steps_trained: 1309000
    num_steps_sampled: 1309000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1309,33573.7,1309000,-3.1493,-2.46,-4.43,314.93




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1310000
  custom_metrics: {}
  date: 2021-11-05_21-52-41
  done: false
  episode_len_mean: 311.67
  episode_media: {}
  episode_reward_max: -2.4599999999999915
  episode_reward_mean: -3.1166999999999776
  episode_reward_min: -4.109999999999957
  episodes_this_iter: 4
  episodes_total: 4860
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.2286252604590522
          entropy_coeff: 0.009999999999999998
          kl: 0.007815440139689756
          policy_loss: 0.035338724280397096
          total_loss: 0.03356635868549347
          vf_explained_var: 0.8220786452293396
          vf_loss: 0.007673525323884355
    num_agent_steps_sampled: 1310000
    num_agent_steps_trained: 1310000
    num_steps_sampled: 1310000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1310,33613.4,1310000,-3.1167,-2.46,-4.11,311.67


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1311000
  custom_metrics: {}
  date: 2021-11-05_21-53-03
  done: false
  episode_len_mean: 311.38
  episode_media: {}
  episode_reward_max: -2.4599999999999915
  episode_reward_mean: -3.1137999999999773
  episode_reward_min: -4.109999999999957
  episodes_this_iter: 3
  episodes_total: 4863
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.3485073407491048
          entropy_coeff: 0.009999999999999998
          kl: 0.01010692418220758
          policy_loss: -0.0644810007678138
          total_loss: -0.06374478389819463
          vf_explained_var: 0.31341665983200073
          vf_loss: 0.010548137407749892
    num_agent_steps_sampled: 1311000
    num_agent_steps_trained: 1311000
    num_steps_sampled: 1311000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1311,33635.5,1311000,-3.1138,-2.46,-4.11,311.38


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1312000
  custom_metrics: {}
  date: 2021-11-05_21-53-25
  done: false
  episode_len_mean: 310.52
  episode_media: {}
  episode_reward_max: -2.4599999999999915
  episode_reward_mean: -3.105199999999977
  episode_reward_min: -4.109999999999957
  episodes_this_iter: 3
  episodes_total: 4866
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.2440643058882819
          entropy_coeff: 0.009999999999999998
          kl: 0.010078405879512529
          policy_loss: 0.030121159553527833
          total_loss: 0.029418693317307367
          vf_explained_var: 0.5237067937850952
          vf_loss: 0.00807538445935481
    num_agent_steps_sampled: 1312000
    num_agent_steps_trained: 1312000
    num_steps_sampled: 1312000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1312,33657.7,1312000,-3.1052,-2.46,-4.11,310.52


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1313000
  custom_metrics: {}
  date: 2021-11-05_21-53-47
  done: false
  episode_len_mean: 309.27
  episode_media: {}
  episode_reward_max: -2.4599999999999915
  episode_reward_mean: -3.092699999999978
  episode_reward_min: -4.109999999999957
  episodes_this_iter: 4
  episodes_total: 4870
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.1956125603781806
          entropy_coeff: 0.009999999999999998
          kl: 0.008546136013503474
          policy_loss: -0.05634108293387625
          total_loss: -0.05258638179964489
          vf_explained_var: 0.3794565498828888
          vf_loss: 0.01260491232905123
    num_agent_steps_sampled: 1313000
    num_agent_steps_trained: 1313000
    num_steps_sampled: 1313000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1313,33679.9,1313000,-3.0927,-2.46,-4.11,309.27


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1314000
  custom_metrics: {}
  date: 2021-11-05_21-54-08
  done: false
  episode_len_mean: 309.75
  episode_media: {}
  episode_reward_max: -2.4599999999999915
  episode_reward_mean: -3.097499999999977
  episode_reward_min: -4.109999999999957
  episodes_this_iter: 3
  episodes_total: 4873
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.2995692239867316
          entropy_coeff: 0.009999999999999998
          kl: 0.008603093714422055
          policy_loss: 0.09507318975196945
          total_loss: 0.09265977591276169
          vf_explained_var: 0.39081573486328125
          vf_loss: 0.007455661373549244
    num_agent_steps_sampled: 1314000
    num_agent_steps_trained: 1314000
    num_steps_sampled: 1314000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1314,33700.4,1314000,-3.0975,-2.46,-4.11,309.75


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1315000
  custom_metrics: {}
  date: 2021-11-05_21-54-28
  done: false
  episode_len_mean: 310.25
  episode_media: {}
  episode_reward_max: -2.4599999999999915
  episode_reward_mean: -3.1024999999999765
  episode_reward_min: -4.109999999999957
  episodes_this_iter: 3
  episodes_total: 4876
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3634294069237917
          cur_lr: 5.000000000000001e-05
          entropy: 1.1459335678153568
          entropy_coeff: 0.009999999999999998
          kl: 0.004217334576371772
          policy_loss: 0.05904478248622683
          total_loss: 0.05601872785223855
          vf_explained_var: 0.5882319211959839
          vf_loss: 0.006900579647885429
    num_agent_steps_sampled: 1315000
    num_agent_steps_trained: 1315000
    num_steps_sampled: 1315000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1315,33720.8,1315000,-3.1025,-2.46,-4.11,310.25


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1316000
  custom_metrics: {}
  date: 2021-11-05_21-54-50
  done: false
  episode_len_mean: 309.31
  episode_media: {}
  episode_reward_max: -2.4599999999999915
  episode_reward_mean: -3.093099999999977
  episode_reward_min: -4.109999999999957
  episodes_this_iter: 3
  episodes_total: 4879
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18171470346189586
          cur_lr: 5.000000000000001e-05
          entropy: 1.2281475292311774
          entropy_coeff: 0.009999999999999998
          kl: 0.009560876002023298
          policy_loss: -0.09207050953474309
          total_loss: -0.09066044183240997
          vf_explained_var: 0.46457794308662415
          vf_loss: 0.011954190209507942
    num_agent_steps_sampled: 1316000
    num_agent_steps_trained: 1316000
    num_steps_sampled: 1316000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1316,33742.4,1316000,-3.0931,-2.46,-4.11,309.31


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1317000
  custom_metrics: {}
  date: 2021-11-05_21-55-12
  done: false
  episode_len_mean: 308.25
  episode_media: {}
  episode_reward_max: -2.4599999999999915
  episode_reward_mean: -3.0824999999999774
  episode_reward_min: -4.109999999999957
  episodes_this_iter: 4
  episodes_total: 4883
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18171470346189586
          cur_lr: 5.000000000000001e-05
          entropy: 1.204481385813819
          entropy_coeff: 0.009999999999999998
          kl: 0.013379203425864716
          policy_loss: -0.029790887898868985
          total_loss: -0.029986385504404703
          vf_explained_var: 0.6534722447395325
          vf_loss: 0.009418117751677831
    num_agent_steps_sampled: 1317000
    num_agent_steps_trained: 1317000
    num_steps_sampled: 1317000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1317,33764.7,1317000,-3.0825,-2.46,-4.11,308.25


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1318000
  custom_metrics: {}
  date: 2021-11-05_21-55-32
  done: false
  episode_len_mean: 306.42
  episode_media: {}
  episode_reward_max: -2.4599999999999915
  episode_reward_mean: -3.0641999999999787
  episode_reward_min: -4.109999999999957
  episodes_this_iter: 3
  episodes_total: 4886
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18171470346189586
          cur_lr: 5.000000000000001e-05
          entropy: 1.2573307222790189
          entropy_coeff: 0.009999999999999998
          kl: 0.010339833526797154
          policy_loss: 0.012429055737124549
          total_loss: 0.008735930422941844
          vf_explained_var: 0.7611212730407715
          vf_loss: 0.007001283201518365
    num_agent_steps_sampled: 1318000
    num_agent_steps_trained: 1318000
    num_steps_sampled: 1318000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1318,33784.9,1318000,-3.0642,-2.46,-4.11,306.42


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1319000
  custom_metrics: {}
  date: 2021-11-05_21-55-56
  done: false
  episode_len_mean: 303.23
  episode_media: {}
  episode_reward_max: -2.4599999999999915
  episode_reward_mean: -3.0322999999999785
  episode_reward_min: -4.0299999999999585
  episodes_this_iter: 3
  episodes_total: 4889
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18171470346189586
          cur_lr: 5.000000000000001e-05
          entropy: 1.3034266100989447
          entropy_coeff: 0.009999999999999998
          kl: 0.01660369035997012
          policy_loss: -0.029001941531896593
          total_loss: -0.031038019371529422
          vf_explained_var: 0.5433884263038635
          vf_loss: 0.00798105261185103
    num_agent_steps_sampled: 1319000
    num_agent_steps_trained: 1319000
    num_steps_sampled: 1319000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1319,33808.1,1319000,-3.0323,-2.46,-4.03,303.23




Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1320000
  custom_metrics: {}
  date: 2021-11-05_21-56-35
  done: false
  episode_len_mean: 301.9
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -3.0189999999999797
  episode_reward_min: -4.0299999999999585
  episodes_this_iter: 4
  episodes_total: 4893
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18171470346189586
          cur_lr: 5.000000000000001e-05
          entropy: 1.15432208776474
          entropy_coeff: 0.009999999999999998
          kl: 0.012454766525111596
          policy_loss: -0.036412015557289124
          total_loss: -0.037012242277463275
          vf_explained_var: 0.7066868543624878
          vf_loss: 0.008679776328305404
    num_agent_steps_sampled: 1320000
    num_agent_steps_trained: 1320000
    num_steps_sampled: 1320000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1320,33847.1,1320000,-3.019,-2.37,-4.03,301.9


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1321000
  custom_metrics: {}
  date: 2021-11-05_21-56-57
  done: false
  episode_len_mean: 300.3
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -3.00299999999998
  episode_reward_min: -4.0299999999999585
  episodes_this_iter: 3
  episodes_total: 4896
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18171470346189586
          cur_lr: 5.000000000000001e-05
          entropy: 1.0831775354014503
          entropy_coeff: 0.009999999999999998
          kl: 0.010121352283082826
          policy_loss: 0.018981050699949265
          total_loss: 0.018476125680738024
          vf_explained_var: 0.5263505578041077
          vf_loss: 0.008487651277230017
    num_agent_steps_sampled: 1321000
    num_agent_steps_trained: 1321000
    num_steps_sampled: 1321000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1321,33869.6,1321000,-3.003,-2.37,-4.03,300.3


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1322000
  custom_metrics: {}
  date: 2021-11-05_21-57-19
  done: false
  episode_len_mean: 300.19
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -3.00189999999998
  episode_reward_min: -4.0299999999999585
  episodes_this_iter: 3
  episodes_total: 4899
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18171470346189586
          cur_lr: 5.000000000000001e-05
          entropy: 0.993696555826399
          entropy_coeff: 0.009999999999999998
          kl: 0.006093254651521147
          policy_loss: -0.07985732166303529
          total_loss: -0.07876977564560042
          vf_explained_var: 0.5749735236167908
          vf_loss: 0.009917275617933936
    num_agent_steps_sampled: 1322000
    num_agent_steps_trained: 1322000
    num_steps_sampled: 1322000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1322,33891.4,1322000,-3.0019,-2.37,-4.03,300.19


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1323000
  custom_metrics: {}
  date: 2021-11-05_21-57-42
  done: false
  episode_len_mean: 297.76
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -2.97759999999998
  episode_reward_min: -3.9999999999999587
  episodes_this_iter: 4
  episodes_total: 4903
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18171470346189586
          cur_lr: 5.000000000000001e-05
          entropy: 1.0091561641958025
          entropy_coeff: 0.009999999999999998
          kl: 0.007129837429685418
          policy_loss: 0.030874348017904493
          total_loss: 0.02986754741933611
          vf_explained_var: 0.7322816848754883
          vf_loss: 0.007789160957973864
    num_agent_steps_sampled: 1323000
    num_agent_steps_trained: 1323000
    num_steps_sampled: 1323000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1323,33913.9,1323000,-2.9776,-2.37,-4,297.76


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1324000
  custom_metrics: {}
  date: 2021-11-05_21-58-05
  done: false
  episode_len_mean: 296.99
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -2.96989999999998
  episode_reward_min: -3.9999999999999587
  episodes_this_iter: 4
  episodes_total: 4907
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18171470346189586
          cur_lr: 5.000000000000001e-05
          entropy: 0.8667469693554772
          entropy_coeff: 0.009999999999999998
          kl: 0.008209378178811575
          policy_loss: -0.033513271684447926
          total_loss: -0.03125533424317837
          vf_explained_var: 0.5963281989097595
          vf_loss: 0.009433642314333055
    num_agent_steps_sampled: 1324000
    num_agent_steps_trained: 1324000
    num_steps_sampled: 1324000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1324,33937.1,1324000,-2.9699,-2.37,-4,296.99


Result for PPO_my_env_549f0_00000:
  agent_timesteps_total: 1325000
  custom_metrics: {}
  date: 2021-11-05_21-58-27
  done: false
  episode_len_mean: 296.87
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -2.9686999999999806
  episode_reward_min: -3.9999999999999587
  episodes_this_iter: 3
  episodes_total: 4910
  experiment_id: 84379fbd193c493d824c19bcb2dbb009
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18171470346189586
          cur_lr: 5.000000000000001e-05
          entropy: 1.0396046969625685
          entropy_coeff: 0.009999999999999998
          kl: 0.00845061312432074
          policy_loss: 0.03101274644335111
          total_loss: 0.03195282303624683
          vf_explained_var: 0.3669999837875366
          vf_loss: 0.009800523648866348
    num_agent_steps_sampled: 1325000
    num_agent_steps_trained: 1325000
    num_steps_sampled: 1325000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_549f0_00000,RUNNING,192.168.3.5:307252,1325,33959.4,1325000,-2.9687,-2.37,-4,296.87
