In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=2, stride=2, padding=0),  
            nn.ELU(),
            nn.Conv2d(32, 32, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(32, 64, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(64, 128, kernel_size=2, stride=2, padding=0),
            nn.ELU(), 
            nn.Conv2d(128, 256, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Conv2d(256, 512, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
from torch.nn.functional import one_hot

class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        visual_features_dim = 512
        target_features_dim = 9 * 11 * 11 
        self.visual_encoder = VisualEncoder()
        self.visual_encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AngelaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.target_encoder = nn.Sequential(
            nn.Conv3d(7, 1, kernel_size=1, stride=1, padding=0),
            nn.ELU(),
        )
        policy_hidden_dim = 256 
        self.policy_network = nn.Sequential(
            nn.Linear(visual_features_dim + target_features_dim, 1024),
            nn.ELU(),
            nn.Linear(1024, 512),
            nn.ELU(),
            nn.Linear(512, policy_hidden_dim),
            nn.ELU(),
            nn.Linear(policy_hidden_dim, policy_hidden_dim),
            nn.ELU(),
            #nn.Linear(policy_hidden_dim, policy_hidden_dim),
            #nn.ELU(),
        )
        self.action_head = nn.Linear(policy_hidden_dim, action_space.n)
        self.value_head = nn.Linear(policy_hidden_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.visual_encoder.cuda()
            self.target_encoder.cuda()
            self.policy_network.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs']
        pov = obs['pov'].permute(0, 3, 1, 2).float() / 255.0
        target = one_hot(obs['target_grid'].long(), num_classes=7).permute(0, 4, 1, 2, 3).float()
        if self.use_cuda:
            pov.cuda()
            target.cuda()
            
        with torch.no_grad():
            visual_features = self.visual_encoder(pov)
            
        target_features = self.target_encoder(target)
        target_features = target_features.reshape(target_features.shape[0], -1)
        features = torch.cat([visual_features, target_features], dim=1)
        features = self.policy_network(features)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
class VisualObservationWrapper(ObsWrapper):
    def __init__(self, env, include_target=False):
        super().__init__(env)
        self.observation_space = {   
            'pov': gym.spaces.Box(low=0, high=255, shape=(64, 64, 3)),
            'inventory': gym.spaces.Box(low=0.0, high=20.0, shape=(6,)),
            'compass': gym.spaces.Box(low=-180.0, high=180.0, shape=(1,))
        }
        if include_target:
            self.observation_space['target_grid'] = \
                gym.spaces.Box(low=0, high=6, shape=(9, 11, 11))
        self.observation_space = gym.spaces.Dict(self.observation_space)

    def observation(self, obs, reward=None, done=None, info=None):
        if info is not None:
            if 'target_grid' in info:
                target_grid = info['target_grid']
                del info['target_grid']
            else:
                logger.error(f'info: {info}')
                if hasattr(self.unwrapped, 'should_reset'):
                    self.unwrapped.should_reset(True)
                target_grid = self.env.unwrapped.tasks.current.target_grid
        else:
            target_grid = self.env.unwrapped.tasks.current.target_grid
        return {
            'pov': obs['pov'].astype(np.float32),
            'inventory': obs['inventory'],
            'compass': np.array([obs['compass']['angle'].item()]),
            'target_grid': target_grid
        }

In [6]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, rew):
        if rew == 0:
            rew = -0.01
        if abs(rew) == 1:
            rew /= 10
        return rew
    
def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=250)
    env.update_taskset(TaskSet(preset=["C18", "C118", "C135"]))
    #env = PovOnlyWrapper(env)
    env = VisualObservationWrapper(env, include_target=True)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    env = RewardWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [6]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 3,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             #"gamma": 0.99,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO MultiTask (C18, C118, C135) pretrained (AngelaCNN) (3 noops after placement) r: -0.01 div10"
                  }
              }

        },
        loggers=[WandbLogger],
        local_dir="/IGLU-Minecraft/checkpoints/3_tasks2",
        keep_checkpoints_num=50,
        checkpoint_freq=5,
        checkpoint_at_end=True)



Trial name,status,loc
PPO_my_env_693ef_00000,PENDING,


2021-11-09 03:44:58,953	INFO wandb.py:170 -- Already logged into W&B.
2021-11-09 03:44:59,031	ERROR syncer.py:72 -- Log sync requires rsync to be installed.
[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.6 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=170)[0m 2021-11-09 03:45:02,453	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=170)[0m 2021-11-09 03:45:02,453	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=170)[0m 2021-11-09 03:45:11,832	INFO trainable.py:109 -- Trainable.setup took 11.904 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1998
  custom_metrics: {}
  date: 2021-11-09_03-47-34
  done: false
  episode_len_mean: 97.57894736842105
  episode_media: {}
  episode_reward_max: 0.6099999999999995
  episode_reward_mean: -0.909473684210527
  episode_reward_min: -1.2300000000000006
  episodes_this_iter: 19
  episodes_total: 19
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.8806217715853735
          entropy_coeff: 0.009999999999999998
          kl: 0.006584135617869674
          policy_loss: -0.023850587968315396
          total_loss: -0.03545709422656468
          vf_explained_var: -0.20217332243919373
          vf_loss: 0.015882883601478257
    num_agent_steps_sampled: 1998
    num_agent_steps_trained: 1998
    num_steps_sampled: 1998
    num_steps_trained: 1998
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,1,142.822,1998,-0.909474,0.61,-1.23,97.5789


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 3996
  custom_metrics: {}
  date: 2021-11-09_03-48-11
  done: false
  episode_len_mean: 98.74358974358974
  episode_media: {}
  episode_reward_max: 2.700000000000002
  episode_reward_mean: -0.6966666666666671
  episode_reward_min: -1.5300000000000007
  episodes_this_iter: 20
  episodes_total: 39
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.8770361048834663
          entropy_coeff: 0.009999999999999998
          kl: 0.0066759370025840595
          policy_loss: -0.05041158185118721
          total_loss: 0.03938928218114944
          vf_explained_var: -0.38334691524505615
          vf_loss: 0.11723603822645687
    num_agent_steps_sampled: 3996
    num_agent_steps_trained: 3996
    num_steps_sampled: 3996
    num_steps_trained: 3996
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,2,179.315,3996,-0.696667,2.7,-1.53,98.7436


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 5994
  custom_metrics: {}
  date: 2021-11-09_03-48-46
  done: false
  episode_len_mean: 97.55
  episode_media: {}
  episode_reward_max: 2.700000000000002
  episode_reward_mean: -0.7291666666666673
  episode_reward_min: -1.5300000000000007
  episodes_this_iter: 21
  episodes_total: 60
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.861031421025594
          entropy_coeff: 0.009999999999999998
          kl: 0.007369407822213738
          policy_loss: -0.0378443037292787
          total_loss: 0.013340669143058004
          vf_explained_var: -0.006641898769885302
          vf_loss: 0.0783214052917347
    num_agent_steps_sampled: 5994
    num_agent_steps_trained: 5994
    num_steps_sampled: 5994
    num_steps_trained: 5994
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,3,215.048,5994,-0.729167,2.7,-1.53,97.55


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 7992
  custom_metrics: {}
  date: 2021-11-09_03-49-10
  done: false
  episode_len_mean: 98.05
  episode_media: {}
  episode_reward_max: 2.700000000000002
  episode_reward_mean: -0.7448750000000006
  episode_reward_min: -1.5300000000000007
  episodes_this_iter: 20
  episodes_total: 80
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.848040275346665
          entropy_coeff: 0.009999999999999998
          kl: 0.007734790442975093
          policy_loss: -0.028485655607212158
          total_loss: 0.0030356629618576597
          vf_explained_var: 0.30136972665786743
          vf_loss: 0.05845476226171567
    num_agent_steps_sampled: 7992
    num_agent_steps_trained: 7992
    num_steps_sampled: 7992
    num_steps_trained: 7992
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,4,238.567,7992,-0.744875,2.7,-1.53,98.05


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 9990
  custom_metrics: {}
  date: 2021-11-09_03-49-34
  done: false
  episode_len_mean: 97.5
  episode_media: {}
  episode_reward_max: 2.700000000000002
  episode_reward_mean: -0.7832000000000003
  episode_reward_min: -1.6600000000000008
  episodes_this_iter: 22
  episodes_total: 102
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.831816110156831
          entropy_coeff: 0.009999999999999998
          kl: 0.009299715703708309
          policy_loss: -0.038965600977341336
          total_loss: 0.013745234214833804
          vf_explained_var: -0.24511729180812836
          vf_loss: 0.07916905069280239
    num_agent_steps_sampled: 9990
    num_agent_steps_trained: 9990
    num_steps_sampled: 9990
    num_steps_trained: 9990
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,5,262.459,9990,-0.7832,2.7,-1.66,97.5


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 11988
  custom_metrics: {}
  date: 2021-11-09_03-49-57
  done: false
  episode_len_mean: 98.32
  episode_media: {}
  episode_reward_max: 2.700000000000002
  episode_reward_mean: -0.6319000000000002
  episode_reward_min: -1.7300000000000009
  episodes_this_iter: 18
  episodes_total: 120
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.8040067229952133
          entropy_coeff: 0.009999999999999998
          kl: 0.011525091099372225
          policy_loss: -0.0662038639897392
          total_loss: 0.15664281027302857
          vf_explained_var: 0.11742359399795532
          vf_loss: 0.2485817239398048
    num_agent_steps_sampled: 11988
    num_agent_steps_trained: 11988
    num_steps_sampled: 11988
    num_steps_trained: 11988
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,6,285.136,11988,-0.6319,2.7,-1.73,98.32


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 13986
  custom_metrics: {}
  date: 2021-11-09_03-50-20
  done: false
  episode_len_mean: 98.75
  episode_media: {}
  episode_reward_max: 2.880000000000003
  episode_reward_mean: -0.5854
  episode_reward_min: -1.910000000000001
  episodes_this_iter: 20
  episodes_total: 140
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.785818288439796
          entropy_coeff: 0.009999999999999998
          kl: 0.009334025365896418
          policy_loss: 2.627488048303695e-05
          total_loss: 0.1751351712892453
          vf_explained_var: 0.24862638115882874
          vf_loss: 0.2011002762775336
    num_agent_steps_sampled: 13986
    num_agent_steps_trained: 13986
    num_steps_sampled: 13986
    num_steps_trained: 13986
  iterations_since_restore: 7
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,7,308.716,13986,-0.5854,2.88,-1.91,98.75


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 15984
  custom_metrics: {}
  date: 2021-11-09_03-50-44
  done: false
  episode_len_mean: 100.61
  episode_media: {}
  episode_reward_max: 4.250000000000012
  episode_reward_mean: -0.3767999999999997
  episode_reward_min: -1.910000000000001
  episodes_this_iter: 19
  episodes_total: 159
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.7585929757072813
          entropy_coeff: 0.009999999999999998
          kl: 0.01013494827003848
          policy_loss: 0.009691825863860902
          total_loss: 0.2506728465624508
          vf_explained_var: 0.3480892777442932
          vf_loss: 0.26653996494909127
    num_agent_steps_sampled: 15984
    num_agent_steps_trained: 15984
    num_steps_sampled: 15984
    num_steps_trained: 15984
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,8,332.674,15984,-0.3768,4.25,-1.91,100.61


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 17982
  custom_metrics: {}
  date: 2021-11-09_03-51-09
  done: false
  episode_len_mean: 101.39
  episode_media: {}
  episode_reward_max: 4.250000000000012
  episode_reward_mean: -0.27059999999999945
  episode_reward_min: -1.910000000000001
  episodes_this_iter: 20
  episodes_total: 179
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.739664766902015
          entropy_coeff: 0.009999999999999998
          kl: 0.01072021724127623
          policy_loss: -0.06531109037321238
          total_loss: 0.08617791839476142
          vf_explained_var: 0.3902914822101593
          vf_loss: 0.17674161286226342
    num_agent_steps_sampled: 17982
    num_agent_steps_trained: 17982
    num_steps_sampled: 17982
    num_steps_trained: 17982
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,9,357.115,17982,-0.2706,4.25,-1.91,101.39


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 19980
  custom_metrics: {}
  date: 2021-11-09_03-51-32
  done: false
  episode_len_mean: 103.12
  episode_media: {}
  episode_reward_max: 4.250000000000012
  episode_reward_mean: -0.041699999999998905
  episode_reward_min: -2.0599999999999996
  episodes_this_iter: 18
  episodes_total: 197
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.710440783273606
          entropy_coeff: 0.009999999999999998
          kl: 0.010437243967857454
          policy_loss: -0.09070835166743824
          total_loss: 0.10713364991048972
          vf_explained_var: 0.6435344815254211
          vf_loss: 0.22285895812369527
    num_agent_steps_sampled: 19980
    num_agent_steps_trained: 19980
    num_steps_sampled: 19980
    num_steps_trained: 19980
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,10,380.417,19980,-0.0417,4.25,-2.06,103.12


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 21978
  custom_metrics: {}
  date: 2021-11-09_03-51-55
  done: false
  episode_len_mean: 104.35
  episode_media: {}
  episode_reward_max: 4.370000000000015
  episode_reward_mean: 0.08510000000000158
  episode_reward_min: -2.0599999999999996
  episodes_this_iter: 19
  episodes_total: 216
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.690909316426232
          entropy_coeff: 0.009999999999999998
          kl: 0.012168906313704946
          policy_loss: -0.026069822091431844
          total_loss: 0.15874426171538375
          vf_explained_var: 0.3870446979999542
          vf_loss: 0.20928939314825193
    num_agent_steps_sampled: 21978
    num_agent_steps_trained: 21978
    num_steps_sampled: 21978
    num_steps_trained: 21978
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,11,402.907,21978,0.0851,4.37,-2.06,104.35


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 23976
  custom_metrics: {}
  date: 2021-11-09_03-52-17
  done: false
  episode_len_mean: 105.6
  episode_media: {}
  episode_reward_max: 4.370000000000015
  episode_reward_mean: 0.25850000000000206
  episode_reward_min: -2.0599999999999996
  episodes_this_iter: 18
  episodes_total: 234
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.6625054507028487
          entropy_coeff: 0.009999999999999998
          kl: 0.011115534533092164
          policy_loss: -0.06261468574049928
          total_loss: 0.16776738231558175
          vf_explained_var: 0.5173304080963135
          vf_loss: 0.2547840120182151
    num_agent_steps_sampled: 23976
    num_agent_steps_trained: 23976
    num_steps_sampled: 23976
    num_steps_trained: 23976
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,12,425.654,23976,0.2585,4.37,-2.06,105.6


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 25974
  custom_metrics: {}
  date: 2021-11-09_03-52-40
  done: false
  episode_len_mean: 106.04
  episode_media: {}
  episode_reward_max: 4.520000000000007
  episode_reward_mean: 0.34620000000000234
  episode_reward_min: -2.0599999999999996
  episodes_this_iter: 19
  episodes_total: 253
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.68172238327208
          entropy_coeff: 0.009999999999999998
          kl: 0.012785486105883313
          policy_loss: 0.022145873450097584
          total_loss: 0.20181282230076336
          vf_explained_var: 0.5174775719642639
          vf_loss: 0.2039270740534578
    num_agent_steps_sampled: 25974
    num_agent_steps_trained: 25974
    num_steps_sampled: 25974
    num_steps_trained: 25974
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,13,447.869,25974,0.3462,4.52,-2.06,106.04


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 27972
  custom_metrics: {}
  date: 2021-11-09_03-53-04
  done: false
  episode_len_mean: 105.76
  episode_media: {}
  episode_reward_max: 6.77000000000001
  episode_reward_mean: 0.6131000000000029
  episode_reward_min: -2.0599999999999996
  episodes_this_iter: 20
  episodes_total: 273
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.6799786840166364
          entropy_coeff: 0.009999999999999998
          kl: 0.01297668941320265
          policy_loss: -0.009017946961380186
          total_loss: 0.22819045759914885
          vf_explained_var: 0.4938800036907196
          vf_loss: 0.26141285250584284
    num_agent_steps_sampled: 27972
    num_agent_steps_trained: 27972
    num_steps_sampled: 27972
    num_steps_trained: 27972
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,14,472.372,27972,0.6131,6.77,-2.06,105.76


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 29970
  custom_metrics: {}
  date: 2021-11-09_03-53-27
  done: false
  episode_len_mean: 105.53
  episode_media: {}
  episode_reward_max: 6.77000000000001
  episode_reward_mean: 0.7318000000000027
  episode_reward_min: -1.8500000000000008
  episodes_this_iter: 19
  episodes_total: 292
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.648894767534165
          entropy_coeff: 0.009999999999999998
          kl: 0.012303933054336656
          policy_loss: -0.005256513719047819
          total_loss: 0.2683465646845954
          vf_explained_var: 0.3295733630657196
          vf_loss: 0.2976312365915094
    num_agent_steps_sampled: 29970
    num_agent_steps_trained: 29970
    num_steps_sampled: 29970
    num_steps_trained: 29970
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,15,495.731,29970,0.7318,6.77,-1.85,105.53


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 31968
  custom_metrics: {}
  date: 2021-11-09_03-53-51
  done: false
  episode_len_mean: 104.81
  episode_media: {}
  episode_reward_max: 6.77000000000001
  episode_reward_mean: 0.8752000000000028
  episode_reward_min: -1.8500000000000008
  episodes_this_iter: 20
  episodes_total: 312
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.6457430283228556
          entropy_coeff: 0.009999999999999998
          kl: 0.010650878184077072
          policy_loss: -0.03457434759253547
          total_loss: 0.15997487060903084
          vf_explained_var: 0.5131697058677673
          vf_loss: 0.21887647098019009
    num_agent_steps_sampled: 31968
    num_agent_steps_trained: 31968
    num_steps_sampled: 31968
    num_steps_trained: 31968
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,16,519.312,31968,0.8752,6.77,-1.85,104.81


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 33966
  custom_metrics: {}
  date: 2021-11-09_03-54-15
  done: false
  episode_len_mean: 103.11
  episode_media: {}
  episode_reward_max: 6.77000000000001
  episode_reward_mean: 0.8570000000000025
  episode_reward_min: -1.8500000000000008
  episodes_this_iter: 20
  episodes_total: 332
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.6360355297724407
          entropy_coeff: 0.009999999999999998
          kl: 0.012597189882501674
          policy_loss: -0.023945605435541698
          total_loss: 0.21889139010260503
          vf_explained_var: 0.45043864846229553
          vf_loss: 0.2666779096637453
    num_agent_steps_sampled: 33966
    num_agent_steps_trained: 33966
    num_steps_sampled: 33966
    num_steps_trained: 33966
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,17,543.437,33966,0.857,6.77,-1.85,103.11




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 35964
  custom_metrics: {}
  date: 2021-11-09_03-54-56
  done: false
  episode_len_mean: 100.98
  episode_media: {}
  episode_reward_max: 6.77000000000001
  episode_reward_mean: 0.910700000000003
  episode_reward_min: -1.8300000000000007
  episodes_this_iter: 20
  episodes_total: 352
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.5943632137207757
          entropy_coeff: 0.009999999999999998
          kl: 0.011688008011139131
          policy_loss: -0.012254692223810015
          total_loss: 0.257935588079549
          vf_explained_var: 0.556313157081604
          vf_loss: 0.2937963085160369
    num_agent_steps_sampled: 35964
    num_agent_steps_trained: 35964
    num_steps_sampled: 35964
    num_steps_trained: 35964
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,18,583.899,35964,0.9107,6.77,-1.83,100.98




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 37962
  custom_metrics: {}
  date: 2021-11-09_03-55-40
  done: false
  episode_len_mean: 99.62
  episode_media: {}
  episode_reward_max: 8.310000000000015
  episode_reward_mean: 0.9985000000000034
  episode_reward_min: -1.8300000000000007
  episodes_this_iter: 21
  episodes_total: 373
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.568525317737034
          entropy_coeff: 0.009999999999999998
          kl: 0.017081214762871593
          policy_loss: -0.042039944302468074
          total_loss: 0.3400922513966049
          vf_explained_var: 0.6192892789840698
          vf_loss: 0.40440120767979393
    num_agent_steps_sampled: 37962
    num_agent_steps_trained: 37962
    num_steps_sampled: 37962
    num_steps_trained: 37962
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,19,627.853,37962,0.9985,8.31,-1.83,99.62


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 39960
  custom_metrics: {}
  date: 2021-11-09_03-56-04
  done: false
  episode_len_mean: 100.17
  episode_media: {}
  episode_reward_max: 8.310000000000015
  episode_reward_mean: 1.0676000000000037
  episode_reward_min: -1.910000000000001
  episodes_this_iter: 19
  episodes_total: 392
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.5500413906006587
          entropy_coeff: 0.009999999999999998
          kl: 0.01248535224560672
          policy_loss: 0.02676170947296279
          total_loss: 0.2943708825146868
          vf_explained_var: 0.6623600721359253
          vf_loss: 0.2906125173682258
    num_agent_steps_sampled: 39960
    num_agent_steps_trained: 39960
    num_steps_sampled: 39960
    num_steps_trained: 39960
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,20,652.373,39960,1.0676,8.31,-1.91,100.17


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 41958
  custom_metrics: {}
  date: 2021-11-09_03-56-28
  done: false
  episode_len_mean: 100.63
  episode_media: {}
  episode_reward_max: 8.310000000000015
  episode_reward_mean: 1.107100000000004
  episode_reward_min: -1.910000000000001
  episodes_this_iter: 18
  episodes_total: 410
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.581750552994864
          entropy_coeff: 0.009999999999999998
          kl: 0.013472409887130221
          policy_loss: -0.024986294745689346
          total_loss: 0.2132646860466117
          vf_explained_var: 0.5718298554420471
          vf_loss: 0.26137400400780497
    num_agent_steps_sampled: 41958
    num_agent_steps_trained: 41958
    num_steps_sampled: 41958
    num_steps_trained: 41958
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,21,676.006,41958,1.1071,8.31,-1.91,100.63


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 43956
  custom_metrics: {}
  date: 2021-11-09_03-56-52
  done: false
  episode_len_mean: 101.91
  episode_media: {}
  episode_reward_max: 8.310000000000015
  episode_reward_mean: 1.3439000000000043
  episode_reward_min: -1.910000000000001
  episodes_this_iter: 19
  episodes_total: 429
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.5109811317353024
          entropy_coeff: 0.009999999999999998
          kl: 0.01701473282308446
          policy_loss: -0.023724676721862386
          total_loss: 0.22323595205588
          vf_explained_var: 0.6480076909065247
          vf_loss: 0.26866749337031726
    num_agent_steps_sampled: 43956
    num_agent_steps_trained: 43956
    num_steps_sampled: 43956
    num_steps_trained: 43956
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,22,700.204,43956,1.3439,8.31,-1.91,101.91


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 45954
  custom_metrics: {}
  date: 2021-11-09_03-57-16
  done: false
  episode_len_mean: 104.07
  episode_media: {}
  episode_reward_max: 8.310000000000015
  episode_reward_mean: 1.601700000000006
  episode_reward_min: -1.910000000000001
  episodes_this_iter: 20
  episodes_total: 449
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.5217662334442137
          entropy_coeff: 0.009999999999999998
          kl: 0.018008268100678872
          policy_loss: -0.01586805805563927
          total_loss: 0.2932899527229546
          vf_explained_var: 0.6849979758262634
          vf_loss: 0.33077401866515477
    num_agent_steps_sampled: 45954
    num_agent_steps_trained: 45954
    num_steps_sampled: 45954
    num_steps_trained: 45954
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,23,723.707,45954,1.6017,8.31,-1.91,104.07


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 47952
  custom_metrics: {}
  date: 2021-11-09_03-57-40
  done: false
  episode_len_mean: 106.34
  episode_media: {}
  episode_reward_max: 4.570000000000013
  episode_reward_mean: 1.6404000000000065
  episode_reward_min: -2.2199999999999993
  episodes_this_iter: 19
  episodes_total: 468
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.4911420878909882
          entropy_coeff: 0.009999999999999998
          kl: 0.016402817961071195
          policy_loss: -0.075735267624259
          total_loss: 0.2106581331674187
          vf_explained_var: 0.6794300079345703
          vf_loss: 0.30802425770532516
    num_agent_steps_sampled: 47952
    num_agent_steps_trained: 47952
    num_steps_sampled: 47952
    num_steps_trained: 47952
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,24,747.681,47952,1.6404,4.57,-2.22,106.34


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 49950
  custom_metrics: {}
  date: 2021-11-09_03-58-04
  done: false
  episode_len_mean: 106.79
  episode_media: {}
  episode_reward_max: 5.910000000000016
  episode_reward_mean: 1.621200000000006
  episode_reward_min: -2.2199999999999993
  episodes_this_iter: 17
  episodes_total: 485
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.4893398250852314
          entropy_coeff: 0.009999999999999998
          kl: 0.012489598208693123
          policy_loss: -0.05728182065344992
          total_loss: 0.16634416159774576
          vf_explained_var: 0.6921546459197998
          vf_loss: 0.24602145921616328
    num_agent_steps_sampled: 49950
    num_agent_steps_trained: 49950
    num_steps_sampled: 49950
    num_steps_trained: 49950
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,25,771.545,49950,1.6212,5.91,-2.22,106.79


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 51948
  custom_metrics: {}
  date: 2021-11-09_03-58-27
  done: false
  episode_len_mean: 107.28
  episode_media: {}
  episode_reward_max: 6.080000000000016
  episode_reward_mean: 1.7196000000000062
  episode_reward_min: -2.2199999999999993
  episodes_this_iter: 18
  episodes_total: 503
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.4727191697983515
          entropy_coeff: 0.009999999999999998
          kl: 0.017460293689909652
          policy_loss: -0.003937986635026478
          total_loss: 0.18703427434499775
          vf_explained_var: 0.7565370202064514
          vf_loss: 0.2122073936675276
    num_agent_steps_sampled: 51948
    num_agent_steps_trained: 51948
    num_steps_sampled: 51948
    num_steps_trained: 51948
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,26,794.672,51948,1.7196,6.08,-2.22,107.28


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 53946
  custom_metrics: {}
  date: 2021-11-09_03-58-50
  done: false
  episode_len_mean: 107.97
  episode_media: {}
  episode_reward_max: 6.080000000000016
  episode_reward_mean: 1.8740000000000074
  episode_reward_min: -2.2199999999999993
  episodes_this_iter: 19
  episodes_total: 522
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.459307517324175
          entropy_coeff: 0.009999999999999998
          kl: 0.016371523400949668
          policy_loss: -0.045432296253386
          total_loss: 0.214181174266906
          vf_explained_var: 0.748858630657196
          vf_loss: 0.280932240046206
    num_agent_steps_sampled: 53946
    num_agent_steps_trained: 53946
    num_steps_sampled: 53946
    num_steps_trained: 53946
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,27,817.586,53946,1.874,6.08,-2.22,107.97


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 55944
  custom_metrics: {}
  date: 2021-11-09_03-59-13
  done: false
  episode_len_mean: 108.74
  episode_media: {}
  episode_reward_max: 6.370000000000016
  episode_reward_mean: 1.8824000000000074
  episode_reward_min: -2.2199999999999993
  episodes_this_iter: 18
  episodes_total: 540
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.4099868797120596
          entropy_coeff: 0.009999999999999998
          kl: 0.012732410003548536
          policy_loss: -0.09076143212261654
          total_loss: 0.13610822835138867
          vf_explained_var: 0.7855714559555054
          vf_loss: 0.24842304931510062
    num_agent_steps_sampled: 55944
    num_agent_steps_trained: 55944
    num_steps_sampled: 55944
    num_steps_trained: 55944
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,28,840.782,55944,1.8824,6.37,-2.22,108.74


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 57942
  custom_metrics: {}
  date: 2021-11-09_03-59-38
  done: false
  episode_len_mean: 108.81
  episode_media: {}
  episode_reward_max: 6.370000000000016
  episode_reward_mean: 1.893100000000008
  episode_reward_min: -2.2199999999999993
  episodes_this_iter: 19
  episodes_total: 559
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.4188638596307666
          entropy_coeff: 0.009999999999999998
          kl: 0.020871354639357794
          policy_loss: -0.06565457223249334
          total_loss: 0.4078754675086765
          vf_explained_var: 0.6833553314208984
          vf_loss: 0.49354440413770223
    num_agent_steps_sampled: 57942
    num_agent_steps_trained: 57942
    num_steps_sampled: 57942
    num_steps_trained: 57942
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,29,865.323,57942,1.8931,6.37,-2.22,108.81


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 59940
  custom_metrics: {}
  date: 2021-11-09_04-00-02
  done: false
  episode_len_mean: 107.46
  episode_media: {}
  episode_reward_max: 6.370000000000016
  episode_reward_mean: 2.2161000000000084
  episode_reward_min: -1.8200000000000007
  episodes_this_iter: 19
  episodes_total: 578
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.412447778383891
          entropy_coeff: 0.009999999999999998
          kl: 0.009451503430372562
          policy_loss: 0.002894151991321927
          total_loss: 0.31726864938225063
          vf_explained_var: 0.6476636528968811
          vf_loss: 0.33566352611496336
    num_agent_steps_sampled: 59940
    num_agent_steps_trained: 59940
    num_steps_sampled: 59940
    num_steps_trained: 59940
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,30,889.558,59940,2.2161,6.37,-1.82,107.46


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 61938
  custom_metrics: {}
  date: 2021-11-09_04-00-25
  done: false
  episode_len_mean: 107.86
  episode_media: {}
  episode_reward_max: 6.48000000000001
  episode_reward_mean: 2.410100000000009
  episode_reward_min: -1.7400000000000009
  episodes_this_iter: 19
  episodes_total: 597
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.3897206556229365
          entropy_coeff: 0.009999999999999998
          kl: 0.012921445993584229
          policy_loss: -0.019602791805352484
          total_loss: 0.2863972791958423
          vf_explained_var: 0.5754073262214661
          vf_loss: 0.3260208427906036
    num_agent_steps_sampled: 61938
    num_agent_steps_trained: 61938
    num_steps_sampled: 61938
    num_steps_trained: 61938
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,31,912.647,61938,2.4101,6.48,-1.74,107.86


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 63936
  custom_metrics: {}
  date: 2021-11-09_04-00-49
  done: false
  episode_len_mean: 106.94
  episode_media: {}
  episode_reward_max: 6.48000000000001
  episode_reward_mean: 2.405600000000008
  episode_reward_min: -1.7400000000000009
  episodes_this_iter: 19
  episodes_total: 616
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.377284228234064
          entropy_coeff: 0.009999999999999998
          kl: 0.01021831024262174
          policy_loss: -0.07291017561441376
          total_loss: 0.19551375416063127
          vf_explained_var: 0.7554401159286499
          vf_loss: 0.28913127950259615
    num_agent_steps_sampled: 63936
    num_agent_steps_trained: 63936
    num_steps_sampled: 63936
    num_steps_trained: 63936
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,32,936.408,63936,2.4056,6.48,-1.74,106.94


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 65934
  custom_metrics: {}
  date: 2021-11-09_04-01-12
  done: false
  episode_len_mean: 106.96
  episode_media: {}
  episode_reward_max: 6.48000000000001
  episode_reward_mean: 2.5955000000000092
  episode_reward_min: -1.7400000000000009
  episodes_this_iter: 17
  episodes_total: 633
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.39376582191104
          entropy_coeff: 0.009999999999999998
          kl: 0.013254968866597723
          policy_loss: -0.01611745141091801
          total_loss: 0.2889775233609336
          vf_explained_var: 0.7539677023887634
          vf_loss: 0.3250561406924611
    num_agent_steps_sampled: 65934
    num_agent_steps_trained: 65934
    num_steps_sampled: 65934
    num_steps_trained: 65934
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,33,959.697,65934,2.5955,6.48,-1.74,106.96


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 67932
  custom_metrics: {}
  date: 2021-11-09_04-01-35
  done: false
  episode_len_mean: 109.2
  episode_media: {}
  episode_reward_max: 6.48000000000001
  episode_reward_mean: 2.729900000000009
  episode_reward_min: -0.9700000000000002
  episodes_this_iter: 17
  episodes_total: 650
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.293227023170108
          entropy_coeff: 0.009999999999999998
          kl: 0.013578774451945485
          policy_loss: -0.046204529010823794
          total_loss: 0.2978675869426557
          vf_explained_var: 0.6993898153305054
          vf_loss: 0.36293075049207324
    num_agent_steps_sampled: 67932
    num_agent_steps_trained: 67932
    num_steps_sampled: 67932
    num_steps_trained: 67932
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,34,982.563,67932,2.7299,6.48,-0.97,109.2


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 69930
  custom_metrics: {}
  date: 2021-11-09_04-01-57
  done: false
  episode_len_mean: 110.77
  episode_media: {}
  episode_reward_max: 6.48000000000001
  episode_reward_mean: 2.6163000000000096
  episode_reward_min: -0.9700000000000002
  episodes_this_iter: 18
  episodes_total: 668
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.3268958977290564
          entropy_coeff: 0.009999999999999998
          kl: 0.013107018163233118
          policy_loss: 0.021567413423742566
          total_loss: 0.2388814958610705
          vf_explained_var: 0.6692510843276978
          vf_loss: 0.23665093334303017
    num_agent_steps_sampled: 69930
    num_agent_steps_trained: 69930
    num_steps_sampled: 69930
    num_steps_trained: 69930
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,35,1005.03,69930,2.6163,6.48,-0.97,110.77


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 71928
  custom_metrics: {}
  date: 2021-11-09_04-02-21
  done: false
  episode_len_mean: 112.01
  episode_media: {}
  episode_reward_max: 8.420000000000012
  episode_reward_mean: 2.7084000000000104
  episode_reward_min: -0.9700000000000002
  episodes_this_iter: 17
  episodes_total: 685
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.281095334461757
          entropy_coeff: 0.009999999999999998
          kl: 0.015510114881605086
          policy_loss: -0.003084441034921578
          total_loss: 0.34606841660681226
          vf_explained_var: 0.683704674243927
          vf_loss: 0.3673107778032621
    num_agent_steps_sampled: 71928
    num_agent_steps_trained: 71928
    num_steps_sampled: 71928
    num_steps_trained: 71928
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,36,1028.27,71928,2.7084,8.42,-0.97,112.01


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 73926
  custom_metrics: {}
  date: 2021-11-09_04-02-45
  done: false
  episode_len_mean: 112.19
  episode_media: {}
  episode_reward_max: 8.420000000000012
  episode_reward_mean: 2.70770000000001
  episode_reward_min: -0.9700000000000002
  episodes_this_iter: 18
  episodes_total: 703
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.3526969580423263
          entropy_coeff: 0.009999999999999998
          kl: 0.013992183634863114
          policy_loss: -0.036705996983108065
          total_loss: 0.27665467755425543
          vf_explained_var: 0.6710766553878784
          vf_loss: 0.3326899873358863
    num_agent_steps_sampled: 73926
    num_agent_steps_trained: 73926
    num_steps_sampled: 73926
    num_steps_trained: 73926
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,37,1052.12,73926,2.7077,8.42,-0.97,112.19




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 75924
  custom_metrics: {}
  date: 2021-11-09_04-03-25
  done: false
  episode_len_mean: 109.51
  episode_media: {}
  episode_reward_max: 8.420000000000012
  episode_reward_mean: 2.7229000000000103
  episode_reward_min: -0.9700000000000002
  episodes_this_iter: 22
  episodes_total: 725
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.3127629632041566
          entropy_coeff: 0.009999999999999998
          kl: 0.013500290616714964
          policy_loss: -0.02976976219742071
          total_loss: 0.2662973164891203
          vf_explained_var: 0.7607832551002502
          vf_loss: 0.3151446201971599
    num_agent_steps_sampled: 75924
    num_agent_steps_trained: 75924
    num_steps_sampled: 75924
    num_steps_trained: 75924
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,38,1093.01,75924,2.7229,8.42,-0.97,109.51


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 77922
  custom_metrics: {}
  date: 2021-11-09_04-03-50
  done: false
  episode_len_mean: 108.87
  episode_media: {}
  episode_reward_max: 8.420000000000012
  episode_reward_mean: 2.6598000000000104
  episode_reward_min: -0.9700000000000002
  episodes_this_iter: 18
  episodes_total: 743
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.311455706187657
          entropy_coeff: 0.009999999999999998
          kl: 0.014222167513675523
          policy_loss: -0.00372384355536529
          total_loss: 0.2260424382208536
          vf_explained_var: 0.7831085324287415
          vf_loss: 0.24861418980927694
    num_agent_steps_sampled: 77922
    num_agent_steps_trained: 77922
    num_steps_sampled: 77922
    num_steps_trained: 77922
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,39,1117.15,77922,2.6598,8.42,-0.97,108.87


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 79920
  custom_metrics: {}
  date: 2021-11-09_04-04-12
  done: false
  episode_len_mean: 108.87
  episode_media: {}
  episode_reward_max: 8.420000000000012
  episode_reward_mean: 2.7854000000000116
  episode_reward_min: -0.35000000000000114
  episodes_this_iter: 17
  episodes_total: 760
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.298102289154416
          entropy_coeff: 0.009999999999999998
          kl: 0.01743934236198968
          policy_loss: 0.011286281404041108
          total_loss: 0.2677421232774144
          vf_explained_var: 0.7846932411193848
          vf_loss: 0.27420506278673806
    num_agent_steps_sampled: 79920
    num_agent_steps_trained: 79920
    num_steps_sampled: 79920
    num_steps_trained: 79920
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,40,1139.49,79920,2.7854,8.42,-0.35,108.87


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 81918
  custom_metrics: {}
  date: 2021-11-09_04-04-36
  done: false
  episode_len_mean: 108.32
  episode_media: {}
  episode_reward_max: 8.420000000000012
  episode_reward_mean: 3.030800000000012
  episode_reward_min: -0.33000000000000046
  episodes_this_iter: 18
  episodes_total: 778
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.226231080009824
          entropy_coeff: 0.009999999999999998
          kl: 0.015620006360903304
          policy_loss: -0.04243473658072097
          total_loss: 0.3043724107777789
          vf_explained_var: 0.7521301507949829
          vf_loss: 0.36438345738819666
    num_agent_steps_sampled: 81918
    num_agent_steps_trained: 81918
    num_steps_sampled: 81918
    num_steps_trained: 81918
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,41,1163.7,81918,3.0308,8.42,-0.33,108.32


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 83916
  custom_metrics: {}
  date: 2021-11-09_04-05-00
  done: false
  episode_len_mean: 109.05
  episode_media: {}
  episode_reward_max: 7.600000000000023
  episode_reward_mean: 2.994400000000013
  episode_reward_min: -0.33000000000000046
  episodes_this_iter: 17
  episodes_total: 795
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.2189175764719646
          entropy_coeff: 0.009999999999999998
          kl: 0.012986970949725406
          policy_loss: 0.03015514433029152
          total_loss: 0.30778085527320703
          vf_explained_var: 0.7791894674301147
          vf_loss: 0.29591879386987
    num_agent_steps_sampled: 83916
    num_agent_steps_trained: 83916
    num_steps_sampled: 83916
    num_steps_trained: 83916
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,42,1187.22,83916,2.9944,7.6,-0.33,109.05


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 85914
  custom_metrics: {}
  date: 2021-11-09_04-05-22
  done: false
  episode_len_mean: 110.84
  episode_media: {}
  episode_reward_max: 7.600000000000023
  episode_reward_mean: 3.1828000000000145
  episode_reward_min: -0.33000000000000046
  episodes_this_iter: 17
  episodes_total: 812
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.1526499611990793
          entropy_coeff: 0.009999999999999998
          kl: 0.017916374803837478
          policy_loss: -0.04267719618621327
          total_loss: 0.3701249635645321
          vf_explained_var: 0.7672342658042908
          vf_loss: 0.4289537505024955
    num_agent_steps_sampled: 85914
    num_agent_steps_trained: 85914
    num_steps_sampled: 85914
    num_steps_trained: 85914
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,43,1208.91,85914,3.1828,7.6,-0.33,110.84


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 87912
  custom_metrics: {}
  date: 2021-11-09_04-05-45
  done: false
  episode_len_mean: 114.03
  episode_media: {}
  episode_reward_max: 8.330000000000016
  episode_reward_mean: 3.4751000000000154
  episode_reward_min: -0.33000000000000046
  episodes_this_iter: 17
  episodes_total: 829
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.1995293912433445
          entropy_coeff: 0.009999999999999998
          kl: 0.013846637441295617
          policy_loss: -0.01947798551548095
          total_loss: 0.2840131950520334
          vf_explained_var: 0.7694193720817566
          vf_loss: 0.3213324859028771
    num_agent_steps_sampled: 87912
    num_agent_steps_trained: 87912
    num_steps_sampled: 87912
    num_steps_trained: 87912
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,44,1232.26,87912,3.4751,8.33,-0.33,114.03


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 89910
  custom_metrics: {}
  date: 2021-11-09_04-06-09
  done: false
  episode_len_mean: 114.84
  episode_media: {}
  episode_reward_max: 8.330000000000016
  episode_reward_mean: 3.6984000000000163
  episode_reward_min: -0.23999999999999605
  episodes_this_iter: 18
  episodes_total: 847
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.170015975974855
          entropy_coeff: 0.009999999999999998
          kl: 0.014287459125946695
          policy_loss: -0.03816988336898032
          total_loss: 0.28805350396959556
          vf_explained_var: 0.7426525354385376
          vf_loss: 0.34363730826548167
    num_agent_steps_sampled: 89910
    num_agent_steps_trained: 89910
    num_steps_sampled: 89910
    num_steps_trained: 89910
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,45,1255.84,89910,3.6984,8.33,-0.24,114.84


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 91908
  custom_metrics: {}
  date: 2021-11-09_04-06-31
  done: false
  episode_len_mean: 114.29
  episode_media: {}
  episode_reward_max: 8.330000000000016
  episode_reward_mean: 3.7510000000000154
  episode_reward_min: -0.13999999999999976
  episodes_this_iter: 17
  episodes_total: 864
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.1582416347094946
          entropy_coeff: 0.009999999999999998
          kl: 0.013722692981956719
          policy_loss: -0.05983620931704839
          total_loss: 0.22669465958717325
          vf_explained_var: 0.7798749804496765
          vf_loss: 0.30399647897907667
    num_agent_steps_sampled: 91908
    num_agent_steps_trained: 91908
    num_steps_sampled: 91908
    num_steps_trained: 91908
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,46,1278.49,91908,3.751,8.33,-0.14,114.29


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 93906
  custom_metrics: {}
  date: 2021-11-09_04-06-54
  done: false
  episode_len_mean: 114.41
  episode_media: {}
  episode_reward_max: 8.450000000000017
  episode_reward_mean: 3.8621000000000167
  episode_reward_min: -0.13999999999999976
  episodes_this_iter: 18
  episodes_total: 882
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.0943507256962004
          entropy_coeff: 0.009999999999999998
          kl: 0.014561782639600061
          policy_loss: -0.05155631626645724
          total_loss: 0.21923157526623635
          vf_explained_var: 0.83603435754776
          vf_loss: 0.2873628607463269
    num_agent_steps_sampled: 93906
    num_agent_steps_trained: 93906
    num_steps_sampled: 93906
    num_steps_trained: 93906
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,47,1301.68,93906,3.8621,8.45,-0.14,114.41


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 95904
  custom_metrics: {}
  date: 2021-11-09_04-07-16
  done: false
  episode_len_mean: 115.95
  episode_media: {}
  episode_reward_max: 8.450000000000017
  episode_reward_mean: 3.916100000000017
  episode_reward_min: -0.13999999999999976
  episodes_this_iter: 16
  episodes_total: 898
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.083780614535014
          entropy_coeff: 0.009999999999999998
          kl: 0.015066939813225306
          policy_loss: -0.03770366607322579
          total_loss: 0.2830055733344385
          vf_explained_var: 0.8288822174072266
          vf_loss: 0.337026962070238
    num_agent_steps_sampled: 95904
    num_agent_steps_trained: 95904
    num_steps_sampled: 95904
    num_steps_trained: 95904
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,48,1323.07,95904,3.9161,8.45,-0.14,115.95


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 97902
  custom_metrics: {}
  date: 2021-11-09_04-07-39
  done: false
  episode_len_mean: 115.54
  episode_media: {}
  episode_reward_max: 8.450000000000017
  episode_reward_mean: 4.130700000000017
  episode_reward_min: 0.03999999999999878
  episodes_this_iter: 18
  episodes_total: 916
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.0423541596957615
          entropy_coeff: 0.009999999999999998
          kl: 0.015095847700856643
          policy_loss: -0.03794060459449178
          total_loss: 0.32288464783203036
          vf_explained_var: 0.7973204255104065
          vf_loss: 0.3767200379854157
    num_agent_steps_sampled: 97902
    num_agent_steps_trained: 97902
    num_steps_sampled: 97902
    num_steps_trained: 97902
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,49,1345.85,97902,4.1307,8.45,0.04,115.54


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 99900
  custom_metrics: {}
  date: 2021-11-09_04-08-01
  done: false
  episode_len_mean: 115.28
  episode_media: {}
  episode_reward_max: 8.450000000000017
  episode_reward_mean: 4.230000000000018
  episode_reward_min: 0.03999999999999878
  episodes_this_iter: 18
  episodes_total: 934
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.980233969574883
          entropy_coeff: 0.009999999999999998
          kl: 0.013092515067297021
          policy_loss: -0.04181741908902214
          total_loss: 0.25765813369126545
          vf_explained_var: 0.8662614822387695
          vf_loss: 0.31535013772192455
    num_agent_steps_sampled: 99900
    num_agent_steps_trained: 99900
    num_steps_sampled: 99900
    num_steps_trained: 99900
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,50,1368.3,99900,4.23,8.45,0.04,115.28


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 101898
  custom_metrics: {}
  date: 2021-11-09_04-08-24
  done: false
  episode_len_mean: 115.27
  episode_media: {}
  episode_reward_max: 8.450000000000017
  episode_reward_mean: 4.373400000000018
  episode_reward_min: 0.03999999999999878
  episodes_this_iter: 18
  episodes_total: 952
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.0408249917484467
          entropy_coeff: 0.009999999999999998
          kl: 0.015426255068613825
          policy_loss: -0.007843942656403495
          total_loss: 0.2922683711119351
          vf_explained_var: 0.8638580441474915
          vf_loss: 0.3158926853111812
    num_agent_steps_sampled: 101898
    num_agent_steps_trained: 101898
    num_steps_sampled: 101898
    num_steps_trained: 101898
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,51,1391.16,101898,4.3734,8.45,0.04,115.27


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 103896
  custom_metrics: {}
  date: 2021-11-09_04-08-47
  done: false
  episode_len_mean: 115.5
  episode_media: {}
  episode_reward_max: 8.450000000000017
  episode_reward_mean: 4.47000000000002
  episode_reward_min: 1.730000000000014
  episodes_this_iter: 16
  episodes_total: 968
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.0465084427878972
          entropy_coeff: 0.009999999999999998
          kl: 0.01645807182411797
          policy_loss: -0.03876154479526338
          total_loss: 0.2996173066087067
          vf_explained_var: 0.8239596486091614
          vf_loss: 0.3539065139279479
    num_agent_steps_sampled: 103896
    num_agent_steps_trained: 103896
    num_steps_sampled: 103896
    num_steps_trained: 103896
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,52,1413.78,103896,4.47,8.45,1.73,115.5


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 105894
  custom_metrics: {}
  date: 2021-11-09_04-09-09
  done: false
  episode_len_mean: 115.56
  episode_media: {}
  episode_reward_max: 8.240000000000018
  episode_reward_mean: 4.60660000000002
  episode_reward_min: 1.570000000000009
  episodes_this_iter: 17
  episodes_total: 985
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.0544229836690993
          entropy_coeff: 0.009999999999999998
          kl: 0.011636676008214317
          policy_loss: 0.04100422828147809
          total_loss: 0.3565508488920473
          vf_explained_var: 0.832097589969635
          vf_loss: 0.3325998508504459
    num_agent_steps_sampled: 105894
    num_agent_steps_trained: 105894
    num_steps_sampled: 105894
    num_steps_trained: 105894
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,53,1436.49,105894,4.6066,8.24,1.57,115.56


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 107892
  custom_metrics: {}
  date: 2021-11-09_04-09-33
  done: false
  episode_len_mean: 114.9
  episode_media: {}
  episode_reward_max: 10.280000000000019
  episode_reward_mean: 4.84730000000002
  episode_reward_min: 1.3900000000000141
  episodes_this_iter: 18
  episodes_total: 1003
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.0397298801512944
          entropy_coeff: 0.009999999999999998
          kl: 0.012172719894203337
          policy_loss: -0.06038928652803103
          total_loss: 0.30116976276040075
          vf_explained_var: 0.8240649104118347
          vf_loss: 0.37830453188646407
    num_agent_steps_sampled: 107892
    num_agent_steps_trained: 107892
    num_steps_sampled: 107892
    num_steps_trained: 107892
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,54,1459.72,107892,4.8473,10.28,1.39,114.9


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 109890
  custom_metrics: {}
  date: 2021-11-09_04-09-56
  done: false
  episode_len_mean: 113.83
  episode_media: {}
  episode_reward_max: 10.280000000000019
  episode_reward_mean: 4.79940000000002
  episode_reward_min: 1.3900000000000141
  episodes_this_iter: 18
  episodes_total: 1021
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.0767291250683013
          entropy_coeff: 0.009999999999999998
          kl: 0.015660367380408772
          policy_loss: -0.03878968993113154
          total_loss: 0.33202772974258377
          vf_explained_var: 0.8236551880836487
          vf_loss: 0.3868866000147093
    num_agent_steps_sampled: 109890
    num_agent_steps_trained: 109890
    num_steps_sampled: 109890
    num_steps_trained: 109890
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,55,1482.85,109890,4.7994,10.28,1.39,113.83


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 111888
  custom_metrics: {}
  date: 2021-11-09_04-10-20
  done: false
  episode_len_mean: 113.25
  episode_media: {}
  episode_reward_max: 10.280000000000019
  episode_reward_mean: 4.82620000000002
  episode_reward_min: 1.3900000000000141
  episodes_this_iter: 19
  episodes_total: 1040
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.0977039399601165
          entropy_coeff: 0.009999999999999998
          kl: 0.016980812885549933
          policy_loss: -0.019523165472561405
          total_loss: 0.427362242460783
          vf_explained_var: 0.8075976967811584
          vf_loss: 0.46276820458116985
    num_agent_steps_sampled: 111888
    num_agent_steps_trained: 111888
    num_steps_sampled: 111888
    num_steps_trained: 111888
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,56,1507.18,111888,4.8262,10.28,1.39,113.25


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 113886
  custom_metrics: {}
  date: 2021-11-09_04-10-45
  done: false
  episode_len_mean: 110.55
  episode_media: {}
  episode_reward_max: 10.280000000000019
  episode_reward_mean: 4.885800000000018
  episode_reward_min: 1.3900000000000141
  episodes_this_iter: 19
  episodes_total: 1059
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.07346738917487
          entropy_coeff: 0.009999999999999998
          kl: 0.014028893750474352
          policy_loss: -0.03994331432595139
          total_loss: 0.3133088690184411
          vf_explained_var: 0.8264788389205933
          vf_loss: 0.36977819076606205
    num_agent_steps_sampled: 113886
    num_agent_steps_trained: 113886
    num_steps_sampled: 113886
    num_steps_trained: 113886
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,57,1531.89,113886,4.8858,10.28,1.39,110.55




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 115884
  custom_metrics: {}
  date: 2021-11-09_04-11-37
  done: false
  episode_len_mean: 109.04
  episode_media: {}
  episode_reward_max: 10.280000000000019
  episode_reward_mean: 4.791900000000018
  episode_reward_min: -0.05
  episodes_this_iter: 19
  episodes_total: 1078
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.0273526753698077
          entropy_coeff: 0.009999999999999998
          kl: 0.021859155400320622
          policy_loss: -0.05940753643711408
          total_loss: 0.28535945885592984
          vf_explained_var: 0.848068118095398
          vf_loss: 0.3584827768660727
    num_agent_steps_sampled: 115884
    num_agent_steps_trained: 115884
    num_steps_sampled: 115884
    num_steps_trained: 115884
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,58,1583.85,115884,4.7919,10.28,-0.05,109.04


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 117882
  custom_metrics: {}
  date: 2021-11-09_04-12-04
  done: false
  episode_len_mean: 108.39
  episode_media: {}
  episode_reward_max: 10.280000000000019
  episode_reward_mean: 4.8175000000000185
  episode_reward_min: -0.05
  episodes_this_iter: 18
  episodes_total: 1096
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.0026421047392344
          entropy_coeff: 0.009999999999999998
          kl: 0.014944803033317126
          policy_loss: -0.08165990062767552
          total_loss: 0.2779701093477862
          vf_explained_var: 0.8640903234481812
          vf_loss: 0.3729312678532941
    num_agent_steps_sampled: 117882
    num_agent_steps_trained: 117882
    num_steps_sampled: 117882
    num_steps_trained: 117882
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,59,1610.44,117882,4.8175,10.28,-0.05,108.39


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 119880
  custom_metrics: {}
  date: 2021-11-09_04-12-29
  done: false
  episode_len_mean: 107.25
  episode_media: {}
  episode_reward_max: 8.410000000000016
  episode_reward_mean: 4.892300000000019
  episode_reward_min: -0.05
  episodes_this_iter: 18
  episodes_total: 1114
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8903589174861
          entropy_coeff: 0.009999999999999998
          kl: 0.013995832880996468
          policy_loss: -0.051033693055311836
          total_loss: 0.43096723921951796
          vf_explained_var: 0.8192477226257324
          vf_loss: 0.49460639726547967
    num_agent_steps_sampled: 119880
    num_agent_steps_trained: 119880
    num_steps_sampled: 119880
    num_steps_trained: 119880
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,60,1636.12,119880,4.8923,8.41,-0.05,107.25


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 121878
  custom_metrics: {}
  date: 2021-11-09_04-12-54
  done: false
  episode_len_mean: 107.19
  episode_media: {}
  episode_reward_max: 8.350000000000016
  episode_reward_mean: 4.927700000000018
  episode_reward_min: -0.05
  episodes_this_iter: 19
  episodes_total: 1133
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.960709465685345
          entropy_coeff: 0.009999999999999998
          kl: 0.010383460468113613
          policy_loss: -0.04374054469877765
          total_loss: 0.2429510053424608
          vf_explained_var: 0.888956606388092
          vf_loss: 0.30162608847022054
    num_agent_steps_sampled: 121878
    num_agent_steps_trained: 121878
    num_steps_sampled: 121878
    num_steps_trained: 121878
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,61,1661.02,121878,4.9277,8.35,-0.05,107.19


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 123876
  custom_metrics: {}
  date: 2021-11-09_04-13-19
  done: false
  episode_len_mean: 107.87
  episode_media: {}
  episode_reward_max: 8.040000000000019
  episode_reward_mean: 4.909100000000018
  episode_reward_min: -0.05
  episodes_this_iter: 18
  episodes_total: 1151
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8635292830921355
          entropy_coeff: 0.009999999999999998
          kl: 0.012094896383081066
          policy_loss: 0.03357324901790846
          total_loss: 0.3739569134300663
          vf_explained_var: 0.8956397175788879
          vf_loss: 0.3535762541350864
    num_agent_steps_sampled: 123876
    num_agent_steps_trained: 123876
    num_steps_sampled: 123876
    num_steps_trained: 123876
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,62,1686.15,123876,4.9091,8.04,-0.05,107.87


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 125874
  custom_metrics: {}
  date: 2021-11-09_04-13-44
  done: false
  episode_len_mean: 109.2
  episode_media: {}
  episode_reward_max: 8.040000000000019
  episode_reward_mean: 4.895500000000019
  episode_reward_min: 1.6000000000000165
  episodes_this_iter: 19
  episodes_total: 1170
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.013200106507256
          entropy_coeff: 0.009999999999999998
          kl: 0.014413913142763506
          policy_loss: -0.004265573097481614
          total_loss: 0.31513489628476754
          vf_explained_var: 0.8737975358963013
          vf_loss: 0.33304621023791176
    num_agent_steps_sampled: 125874
    num_agent_steps_trained: 125874
    num_steps_sampled: 125874
    num_steps_trained: 125874


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,63,1710.74,125874,4.8955,8.04,1.6,109.2


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 127872
  custom_metrics: {}
  date: 2021-11-09_04-14-09
  done: false
  episode_len_mean: 108.85
  episode_media: {}
  episode_reward_max: 7.900000000000022
  episode_reward_mean: 5.000200000000018
  episode_reward_min: 1.6000000000000165
  episodes_this_iter: 18
  episodes_total: 1188
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.9472735251699176
          entropy_coeff: 0.009999999999999998
          kl: 0.013782804326844983
          policy_loss: 0.0015428579368052028
          total_loss: 0.30344329675038656
          vf_explained_var: 0.8933245539665222
          vf_loss: 0.31517091123830704
    num_agent_steps_sampled: 127872
    num_agent_steps_trained: 127872
    num_steps_sampled: 127872
    num_steps_trained: 12787

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,64,1735.61,127872,5.0002,7.9,1.6,108.85


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 129870
  custom_metrics: {}
  date: 2021-11-09_04-14-32
  done: false
  episode_len_mean: 109.13
  episode_media: {}
  episode_reward_max: 7.900000000000022
  episode_reward_mean: 4.9543000000000195
  episode_reward_min: 1.6000000000000165
  episodes_this_iter: 17
  episodes_total: 1205
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.828952982312157
          entropy_coeff: 0.009999999999999998
          kl: 0.010641088243993006
          policy_loss: -0.012565820504512106
          total_loss: 0.3814888860143366
          vf_explained_var: 0.8183948993682861
          vf_loss: 0.407555746677376
    num_agent_steps_sampled: 129870
    num_agent_steps_trained: 129870
    num_steps_sampled: 129870
    num_steps_trained: 129870
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,65,1759.16,129870,4.9543,7.9,1.6,109.13


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 131868
  custom_metrics: {}
  date: 2021-11-09_04-14-57
  done: false
  episode_len_mean: 110.31
  episode_media: {}
  episode_reward_max: 7.900000000000022
  episode_reward_mean: 4.926800000000019
  episode_reward_min: 1.570000000000012
  episodes_this_iter: 18
  episodes_total: 1223
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8938475324994042
          entropy_coeff: 0.009999999999999998
          kl: 0.01230171263967498
          policy_loss: -0.06475198597514203
          total_loss: 0.3802946381270885
          vf_explained_var: 0.8204451203346252
          vf_loss: 0.45844933241605756
    num_agent_steps_sampled: 131868
    num_agent_steps_trained: 131868
    num_steps_sampled: 131868
    num_steps_trained: 131868
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,66,1783.38,131868,4.9268,7.9,1.57,110.31


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 133866
  custom_metrics: {}
  date: 2021-11-09_04-15-20
  done: false
  episode_len_mean: 111.76
  episode_media: {}
  episode_reward_max: 7.900000000000022
  episode_reward_mean: 5.04430000000002
  episode_reward_min: 1.570000000000012
  episodes_this_iter: 17
  episodes_total: 1240
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.0160211636906578
          entropy_coeff: 0.009999999999999998
          kl: 0.011078055796955854
          policy_loss: -0.010015275329351425
          total_loss: 0.3438279920213279
          vf_explained_var: 0.8497719764709473
          vf_loss: 0.3690183561472666
    num_agent_steps_sampled: 133866
    num_agent_steps_trained: 133866
    num_steps_sampled: 133866
    num_steps_trained: 133866
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,67,1807.05,133866,5.0443,7.9,1.57,111.76


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 135864
  custom_metrics: {}
  date: 2021-11-09_04-15-44
  done: false
  episode_len_mean: 113.26
  episode_media: {}
  episode_reward_max: 7.900000000000016
  episode_reward_mean: 5.0179000000000205
  episode_reward_min: 1.570000000000012
  episodes_this_iter: 17
  episodes_total: 1257
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.0293604680470057
          entropy_coeff: 0.009999999999999998
          kl: 0.018227676689552712
          policy_loss: -0.06041483244903031
          total_loss: 0.3004311767628505
          vf_explained_var: 0.8470023274421692
          vf_loss: 0.37293715750177703
    num_agent_steps_sampled: 135864
    num_agent_steps_trained: 135864
    num_steps_sampled: 135864
    num_steps_trained: 135864


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,68,1831.06,135864,5.0179,7.9,1.57,113.26


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 137862
  custom_metrics: {}
  date: 2021-11-09_04-16-08
  done: false
  episode_len_mean: 114.01
  episode_media: {}
  episode_reward_max: 7.870000000000019
  episode_reward_mean: 5.00680000000002
  episode_reward_min: 1.570000000000012
  episodes_this_iter: 18
  episodes_total: 1275
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.9804128510611398
          entropy_coeff: 0.009999999999999998
          kl: 0.011515350530229286
          policy_loss: -0.030470114875407447
          total_loss: 0.30194798700866243
          vf_explained_var: 0.8498184680938721
          vf_loss: 0.3470403212166968
    num_agent_steps_sampled: 137862
    num_agent_steps_trained: 137862
    num_steps_sampled: 137862
    num_steps_trained: 137862
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,69,1854.68,137862,5.0068,7.87,1.57,114.01


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 139860
  custom_metrics: {}
  date: 2021-11-09_04-16-31
  done: false
  episode_len_mean: 115.65
  episode_media: {}
  episode_reward_max: 7.830000000000023
  episode_reward_mean: 4.92420000000002
  episode_reward_min: 1.570000000000012
  episodes_this_iter: 17
  episodes_total: 1292
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8789924144744874
          entropy_coeff: 0.009999999999999998
          kl: 0.014879965594637617
          policy_loss: -0.046302712726451103
          total_loss: 0.32321352415851184
          vf_explained_var: 0.8398255705833435
          vf_loss: 0.38161017901840666
    num_agent_steps_sampled: 139860
    num_agent_steps_trained: 139860
    num_steps_sampled: 139860
    num_steps_trained: 139860


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,70,1877.2,139860,4.9242,7.83,1.57,115.65


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 141858
  custom_metrics: {}
  date: 2021-11-09_04-16-55
  done: false
  episode_len_mean: 115.54
  episode_media: {}
  episode_reward_max: 10.18000000000002
  episode_reward_mean: 4.987100000000021
  episode_reward_min: 1.570000000000012
  episodes_this_iter: 17
  episodes_total: 1309
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.9164436311948867
          entropy_coeff: 0.009999999999999998
          kl: 0.013615726263435993
          policy_loss: -0.042864624889833586
          total_loss: 0.31201879016700246
          vf_explained_var: 0.8747044205665588
          vf_loss: 0.36792077649207344
    num_agent_steps_sampled: 141858
    num_agent_steps_trained: 141858
    num_steps_sampled: 141858
    num_steps_trained: 141858

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,71,1901.73,141858,4.9871,10.18,1.57,115.54


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 143856
  custom_metrics: {}
  date: 2021-11-09_04-17-18
  done: false
  episode_len_mean: 116.42
  episode_media: {}
  episode_reward_max: 10.18000000000002
  episode_reward_mean: 5.1078000000000205
  episode_reward_min: 1.8000000000000123
  episodes_this_iter: 17
  episodes_total: 1326
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.9106936789694287
          entropy_coeff: 0.009999999999999998
          kl: 0.012145190117180875
          policy_loss: -0.01825353933409566
          total_loss: 0.3224677793504227
          vf_explained_var: 0.8755363821983337
          vf_loss: 0.3543629160949162
    num_agent_steps_sampled: 143856
    num_agent_steps_trained: 143856
    num_steps_sampled: 143856
    num_steps_trained: 143856


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,72,1924.43,143856,5.1078,10.18,1.8,116.42


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 145854
  custom_metrics: {}
  date: 2021-11-09_04-17-42
  done: false
  episode_len_mean: 115.64
  episode_media: {}
  episode_reward_max: 10.420000000000016
  episode_reward_mean: 5.249400000000021
  episode_reward_min: 1.8000000000000123
  episodes_this_iter: 18
  episodes_total: 1344
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.903351998896826
          entropy_coeff: 0.009999999999999998
          kl: 0.01391000803420343
          policy_loss: -0.04799706087935539
          total_loss: 0.311358475569813
          vf_explained_var: 0.8766356110572815
          vf_loss: 0.37212955057621
    num_agent_steps_sampled: 145854
    num_agent_steps_trained: 145854
    num_steps_sampled: 145854
    num_steps_trained: 145854
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,73,1948.2,145854,5.2494,10.42,1.8,115.64


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 147852
  custom_metrics: {}
  date: 2021-11-09_04-18-05
  done: false
  episode_len_mean: 115.52
  episode_media: {}
  episode_reward_max: 10.420000000000016
  episode_reward_mean: 5.4279000000000215
  episode_reward_min: 1.8000000000000123
  episodes_this_iter: 18
  episodes_total: 1362
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.96846315463384
          entropy_coeff: 0.009999999999999998
          kl: 0.01326571381900922
          policy_loss: -0.021381826858435357
          total_loss: 0.2421733227159296
          vf_explained_var: 0.9032809138298035
          vf_loss: 0.277270209079697
    num_agent_steps_sampled: 147852
    num_agent_steps_trained: 147852
    num_steps_sampled: 147852
    num_steps_trained: 147852
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,74,1971.75,147852,5.4279,10.42,1.8,115.52


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 149850
  custom_metrics: {}
  date: 2021-11-09_04-18-28
  done: false
  episode_len_mean: 116.04
  episode_media: {}
  episode_reward_max: 10.420000000000016
  episode_reward_mean: 5.70220000000002
  episode_reward_min: 1.8000000000000123
  episodes_this_iter: 16
  episodes_total: 1378
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8143053083192735
          entropy_coeff: 0.009999999999999998
          kl: 0.020695947257338723
          policy_loss: -0.047476150663126084
          total_loss: 0.47105011677458175
          vf_explained_var: 0.8415629267692566
          vf_loss: 0.5273561454245023
    num_agent_steps_sampled: 149850
    num_agent_steps_trained: 149850
    num_steps_sampled: 149850
    num_steps_trained: 149850

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,75,1994.64,149850,5.7022,10.42,1.8,116.04


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 151848
  custom_metrics: {}
  date: 2021-11-09_04-18-51
  done: false
  episode_len_mean: 115.61
  episode_media: {}
  episode_reward_max: 10.420000000000016
  episode_reward_mean: 5.842300000000019
  episode_reward_min: 1.8000000000000123
  episodes_this_iter: 17
  episodes_total: 1395
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.9220567567007882
          entropy_coeff: 0.009999999999999998
          kl: 0.01209161398060004
          policy_loss: -0.027507350647023747
          total_loss: 0.3808688119586025
          vf_explained_var: 0.8774357438087463
          vf_loss: 0.4194348907896451
    num_agent_steps_sampled: 151848
    num_agent_steps_trained: 151848
    num_steps_sampled: 151848
    num_steps_trained: 151848
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,76,2017.61,151848,5.8423,10.42,1.8,115.61


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 153846
  custom_metrics: {}
  date: 2021-11-09_04-19-15
  done: false
  episode_len_mean: 114.92
  episode_media: {}
  episode_reward_max: 10.420000000000016
  episode_reward_mean: 5.8773000000000195
  episode_reward_min: 2.0600000000000103
  episodes_this_iter: 18
  episodes_total: 1413
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.8457779197465807
          entropy_coeff: 0.009999999999999998
          kl: 0.009241003197188772
          policy_loss: -0.03437241523393563
          total_loss: 0.4119122295507363
          vf_explained_var: 0.8619800209999084
          vf_loss: 0.458504747847716
    num_agent_steps_sampled: 153846
    num_agent_steps_trained: 153846
    num_steps_sampled: 153846
    num_steps_trained: 153846
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,77,2041.7,153846,5.8773,10.42,2.06,114.92




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 155844
  custom_metrics: {}
  date: 2021-11-09_04-20-11
  done: false
  episode_len_mean: 112.03
  episode_media: {}
  episode_reward_max: 10.160000000000021
  episode_reward_mean: 5.743700000000019
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 1434
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.8532118621326628
          entropy_coeff: 0.009999999999999998
          kl: 0.01175780376861744
          policy_loss: -0.023775673976966314
          total_loss: 0.5064218517392873
          vf_explained_var: 0.8293305039405823
          vf_loss: 0.540793121180364
    num_agent_steps_sampled: 155844
    num_agent_steps_trained: 155844
    num_steps_sampled: 155844
    num_steps_trained: 155844
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,78,2096.97,155844,5.7437,10.16,-0.06,112.03


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 157842
  custom_metrics: {}
  date: 2021-11-09_04-20-36
  done: false
  episode_len_mean: 110.94
  episode_media: {}
  episode_reward_max: 10.160000000000021
  episode_reward_mean: 5.72670000000002
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 18
  episodes_total: 1452
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.76231594369525
          entropy_coeff: 0.009999999999999998
          kl: 0.016832259765374013
          policy_loss: -0.043876371599201644
          total_loss: 0.3743671470099971
          vf_explained_var: 0.8798216581344604
          vf_loss: 0.42450490054630097
    num_agent_steps_sampled: 157842
    num_agent_steps_trained: 157842
    num_steps_sampled: 157842
    num_steps_trained: 157842
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,79,2122.02,157842,5.7267,10.16,-0.06,110.94


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 159840
  custom_metrics: {}
  date: 2021-11-09_04-21-00
  done: false
  episode_len_mean: 110.39
  episode_media: {}
  episode_reward_max: 10.160000000000021
  episode_reward_mean: 5.669200000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 18
  episodes_total: 1470
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.7771896765345618
          entropy_coeff: 0.009999999999999998
          kl: 0.011050993813192675
          policy_loss: -0.06638943600867475
          total_loss: 0.2971076707355678
          vf_explained_var: 0.8742092847824097
          vf_loss: 0.3738095837689581
    num_agent_steps_sampled: 159840
    num_agent_steps_trained: 159840
    num_steps_sampled: 159840
    num_steps_trained: 159840
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,80,2146.01,159840,5.6692,10.16,-0.06,110.39


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 161838
  custom_metrics: {}
  date: 2021-11-09_04-21-25
  done: false
  episode_len_mean: 107.64
  episode_media: {}
  episode_reward_max: 10.080000000000014
  episode_reward_mean: 5.5858000000000185
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 1489
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.8339229549680438
          entropy_coeff: 0.009999999999999998
          kl: 0.009431450206601763
          policy_loss: -0.07575519437946024
          total_loss: 0.27123939231747674
          vf_explained_var: 0.884414792060852
          vf_loss: 0.35896758621647246
    num_agent_steps_sampled: 161838
    num_agent_steps_trained: 161838
    num_steps_sampled: 161838
    num_steps_trained: 161838
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,81,2171.46,161838,5.5858,10.08,-0.06,107.64


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 163836
  custom_metrics: {}
  date: 2021-11-09_04-21-49
  done: false
  episode_len_mean: 106.9
  episode_media: {}
  episode_reward_max: 10.080000000000014
  episode_reward_mean: 5.583100000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 18
  episodes_total: 1507
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.8141688290096465
          entropy_coeff: 0.009999999999999998
          kl: 0.010941455899964508
          policy_loss: -0.021816896008593694
          total_loss: 0.2828358444518277
          vf_explained_var: 0.8886467814445496
          vf_loss: 0.31540894558032356
    num_agent_steps_sampled: 163836
    num_agent_steps_trained: 163836
    num_steps_sampled: 163836
    num_steps_trained: 163836
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,82,2195.51,163836,5.5831,10.08,-0.06,106.9


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 165834
  custom_metrics: {}
  date: 2021-11-09_04-22-12
  done: false
  episode_len_mean: 108.63
  episode_media: {}
  episode_reward_max: 10.250000000000016
  episode_reward_mean: 5.667900000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 17
  episodes_total: 1524
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.7876731089183262
          entropy_coeff: 0.009999999999999998
          kl: 0.011638045805572973
          policy_loss: -0.04608763637287276
          total_loss: 0.3133229128750307
          vf_explained_var: 0.8746377229690552
          vf_loss: 0.36943159656865254
    num_agent_steps_sampled: 165834
    num_agent_steps_trained: 165834
    num_steps_sampled: 165834
    num_steps_trained: 165834
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,83,2218.03,165834,5.6679,10.25,-0.06,108.63


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 167832
  custom_metrics: {}
  date: 2021-11-09_04-22-35
  done: false
  episode_len_mean: 110.16
  episode_media: {}
  episode_reward_max: 10.250000000000016
  episode_reward_mean: 5.7283000000000195
  episode_reward_min: 2.2500000000000204
  episodes_this_iter: 19
  episodes_total: 1543
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.7229400089808873
          entropy_coeff: 0.009999999999999998
          kl: 0.012435269757274575
          policy_loss: -0.0392530134391217
          total_loss: 0.3537826274388603
          vf_explained_var: 0.8790363073348999
          vf_loss: 0.4018712347462064
    num_agent_steps_sampled: 167832
    num_agent_steps_trained: 167832
    num_steps_sampled: 167832
    num_steps_trained: 167832
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,84,2240.98,167832,5.7283,10.25,2.25,110.16


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 169830
  custom_metrics: {}
  date: 2021-11-09_04-22-58
  done: false
  episode_len_mean: 110.46
  episode_media: {}
  episode_reward_max: 10.250000000000016
  episode_reward_mean: 5.71720000000002
  episode_reward_min: 1.8400000000000185
  episodes_this_iter: 18
  episodes_total: 1561
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.8207836168152944
          entropy_coeff: 0.009999999999999998
          kl: 0.012656994898510477
          policy_loss: -0.012417852754394214
          total_loss: 0.31548715606331823
          vf_explained_var: 0.8949904441833496
          vf_loss: 0.3375693725688117
    num_agent_steps_sampled: 169830
    num_agent_steps_trained: 169830
    num_steps_sampled: 169830
    num_steps_trained: 169830
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,85,2264.39,169830,5.7172,10.25,1.84,110.46


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 171828
  custom_metrics: {}
  date: 2021-11-09_04-23-22
  done: false
  episode_len_mean: 111.14
  episode_media: {}
  episode_reward_max: 10.250000000000016
  episode_reward_mean: 5.73140000000002
  episode_reward_min: 1.8400000000000185
  episodes_this_iter: 18
  episodes_total: 1579
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.790547373748961
          entropy_coeff: 0.009999999999999998
          kl: 0.012573714175199063
          policy_loss: -0.05332395927537055
          total_loss: 0.3645513403717251
          vf_explained_var: 0.8235441446304321
          vf_loss: 0.42729351768891016
    num_agent_steps_sampled: 171828
    num_agent_steps_trained: 171828
    num_steps_sampled: 171828
    num_steps_trained: 171828
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,86,2287.74,171828,5.7314,10.25,1.84,111.14


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 173826
  custom_metrics: {}
  date: 2021-11-09_04-23-45
  done: false
  episode_len_mean: 111.84
  episode_media: {}
  episode_reward_max: 10.250000000000016
  episode_reward_mean: 5.692600000000019
  episode_reward_min: 1.8400000000000185
  episodes_this_iter: 18
  episodes_total: 1597
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.809579644884382
          entropy_coeff: 0.009999999999999998
          kl: 0.010180890991772436
          policy_loss: -0.05093351911221232
          total_loss: 0.27228652675236975
          vf_explained_var: 0.8795090317726135
          vf_loss: 0.334443742391609
    num_agent_steps_sampled: 173826
    num_agent_steps_trained: 173826
    num_steps_sampled: 173826
    num_steps_trained: 173826
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,87,2311.4,173826,5.6926,10.25,1.84,111.84


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 175824
  custom_metrics: {}
  date: 2021-11-09_04-24-09
  done: false
  episode_len_mean: 111.42
  episode_media: {}
  episode_reward_max: 10.250000000000016
  episode_reward_mean: 5.898100000000017
  episode_reward_min: 1.8400000000000185
  episodes_this_iter: 18
  episodes_total: 1615
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.6550624115126473
          entropy_coeff: 0.009999999999999998
          kl: 0.0164534151201388
          policy_loss: -0.041040054034619106
          total_loss: 0.3958463570546536
          vf_explained_var: 0.8585960865020752
          vf_loss: 0.44233097760450274
    num_agent_steps_sampled: 175824
    num_agent_steps_trained: 175824
    num_steps_sampled: 175824
    num_steps_trained: 175824
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,88,2335.07,175824,5.8981,10.25,1.84,111.42


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 177822
  custom_metrics: {}
  date: 2021-11-09_04-24-33
  done: false
  episode_len_mean: 109.9
  episode_media: {}
  episode_reward_max: 10.090000000000016
  episode_reward_mean: 5.977600000000019
  episode_reward_min: 1.8400000000000185
  episodes_this_iter: 18
  episodes_total: 1633
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.618393365542094
          entropy_coeff: 0.009999999999999998
          kl: 0.010625330954234086
          policy_loss: -0.023578717027391706
          total_loss: 0.3476364024338268
          vf_explained_var: 0.8981428146362305
          vf_loss: 0.3802269560595353
    num_agent_steps_sampled: 177822
    num_agent_steps_trained: 177822
    num_steps_sampled: 177822
    num_steps_trained: 177822
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,89,2359.15,177822,5.9776,10.09,1.84,109.9


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 179820
  custom_metrics: {}
  date: 2021-11-09_04-24-57
  done: false
  episode_len_mean: 108.85
  episode_media: {}
  episode_reward_max: 10.080000000000016
  episode_reward_mean: 5.945200000000018
  episode_reward_min: 1.8400000000000185
  episodes_this_iter: 18
  episodes_total: 1651
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.728463952314286
          entropy_coeff: 0.009999999999999998
          kl: 0.011684818357698244
          policy_loss: -0.0203794097616559
          total_loss: 0.3776203070456783
          vf_explained_var: 0.9000752568244934
          vf_loss: 0.4073971002584412
    num_agent_steps_sampled: 179820
    num_agent_steps_trained: 179820
    num_steps_sampled: 179820
    num_steps_trained: 179820
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,90,2383.22,179820,5.9452,10.08,1.84,108.85


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 181818
  custom_metrics: {}
  date: 2021-11-09_04-25-21
  done: false
  episode_len_mean: 109.38
  episode_media: {}
  episode_reward_max: 10.080000000000016
  episode_reward_mean: 6.012500000000021
  episode_reward_min: 2.460000000000017
  episodes_this_iter: 18
  episodes_total: 1669
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.8342530948775155
          entropy_coeff: 0.009999999999999998
          kl: 0.01351302294025476
          policy_loss: -0.023620128667070753
          total_loss: 0.2805711099523164
          vf_explained_var: 0.8958092927932739
          vf_loss: 0.31341247860164867
    num_agent_steps_sampled: 181818
    num_agent_steps_trained: 181818
    num_steps_sampled: 181818
    num_steps_trained: 181818
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,91,2406.47,181818,6.0125,10.08,2.46,109.38


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 183816
  custom_metrics: {}
  date: 2021-11-09_04-25-45
  done: false
  episode_len_mean: 108.79
  episode_media: {}
  episode_reward_max: 10.610000000000014
  episode_reward_mean: 6.182900000000019
  episode_reward_min: 2.460000000000017
  episodes_this_iter: 19
  episodes_total: 1688
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.7722322015535263
          entropy_coeff: 0.009999999999999998
          kl: 0.009297168857482876
          policy_loss: -0.04463445186792385
          total_loss: 0.23927153745400054
          vf_explained_var: 0.9158617258071899
          vf_loss: 0.29535272014992575
    num_agent_steps_sampled: 183816
    num_agent_steps_trained: 183816
    num_steps_sampled: 183816
    num_steps_trained: 183816
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,92,2431.26,183816,6.1829,10.61,2.46,108.79


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 185814
  custom_metrics: {}
  date: 2021-11-09_04-26-11
  done: false
  episode_len_mean: 107.08
  episode_media: {}
  episode_reward_max: 10.610000000000014
  episode_reward_mean: 6.197600000000018
  episode_reward_min: 2.780000000000019
  episodes_this_iter: 20
  episodes_total: 1708
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.7738579846563793
          entropy_coeff: 0.009999999999999998
          kl: 0.011221774417160458
          policy_loss: -0.012827613843338831
          total_loss: 0.29954378788936
          vf_explained_var: 0.9023517370223999
          vf_loss: 0.32253528322492325
    num_agent_steps_sampled: 185814
    num_agent_steps_trained: 185814
    num_steps_sampled: 185814
    num_steps_trained: 185814
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,93,2456.73,185814,6.1976,10.61,2.78,107.08


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 187812
  custom_metrics: {}
  date: 2021-11-09_04-26-36
  done: false
  episode_len_mean: 106.51
  episode_media: {}
  episode_reward_max: 10.610000000000014
  episode_reward_mean: 5.856700000000019
  episode_reward_min: 2.3700000000000117
  episodes_this_iter: 19
  episodes_total: 1727
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.9239513851347423
          entropy_coeff: 0.009999999999999998
          kl: 0.007954350058284541
          policy_loss: -0.05904413124635106
          total_loss: 0.10086116284309399
          vf_explained_var: 0.9403129816055298
          vf_loss: 0.17377562036826497
    num_agent_steps_sampled: 187812
    num_agent_steps_trained: 187812
    num_steps_sampled: 187812
    num_steps_trained: 187812
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,94,2481.79,187812,5.8567,10.61,2.37,106.51


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 189810
  custom_metrics: {}
  date: 2021-11-09_04-27-01
  done: false
  episode_len_mean: 106.28
  episode_media: {}
  episode_reward_max: 10.610000000000014
  episode_reward_mean: 5.783400000000018
  episode_reward_min: 2.32000000000002
  episodes_this_iter: 19
  episodes_total: 1746
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.8037513182276772
          entropy_coeff: 0.009999999999999998
          kl: 0.012631335315922953
          policy_loss: -0.03671515140505064
          total_loss: 0.3294037643642653
          vf_explained_var: 0.8837664723396301
          vf_loss: 0.37563027633087975
    num_agent_steps_sampled: 189810
    num_agent_steps_trained: 189810
    num_steps_sampled: 189810
    num_steps_trained: 189810
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,95,2506.53,189810,5.7834,10.61,2.32,106.28


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 191808
  custom_metrics: {}
  date: 2021-11-09_04-27-27
  done: false
  episode_len_mean: 105.8
  episode_media: {}
  episode_reward_max: 10.610000000000014
  episode_reward_mean: 5.803300000000018
  episode_reward_min: 2.32000000000002
  episodes_this_iter: 18
  episodes_total: 1764
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.7013385659172422
          entropy_coeff: 0.009999999999999998
          kl: 0.011701392153994931
          policy_loss: -0.04947678102623849
          total_loss: 0.3203045438886398
          vf_explained_var: 0.8659259676933289
          vf_loss: 0.3788962714019276
    num_agent_steps_sampled: 191808
    num_agent_steps_trained: 191808
    num_steps_sampled: 191808
    num_steps_trained: 191808
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,96,2532.38,191808,5.8033,10.61,2.32,105.8




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 193806
  custom_metrics: {}
  date: 2021-11-09_04-28-22
  done: false
  episode_len_mean: 103.55
  episode_media: {}
  episode_reward_max: 10.370000000000015
  episode_reward_mean: 5.587500000000016
  episode_reward_min: 1.94
  episodes_this_iter: 20
  episodes_total: 1784
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.6817511496089754
          entropy_coeff: 0.009999999999999998
          kl: 0.011838777440010504
          policy_loss: -0.02391521182088625
          total_loss: 0.3553296384002481
          vf_explained_var: 0.8725080490112305
          vf_loss: 0.38807118229922793
    num_agent_steps_sampled: 193806
    num_agent_steps_trained: 193806
    num_steps_sampled: 193806
    num_steps_trained: 193806
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,97,2587.49,193806,5.5875,10.37,1.94,103.55




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 195804
  custom_metrics: {}
  date: 2021-11-09_04-29-03
  done: false
  episode_len_mean: 102.88
  episode_media: {}
  episode_reward_max: 10.370000000000015
  episode_reward_mean: 5.584400000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 1805
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.7376719491822379
          entropy_coeff: 0.009999999999999998
          kl: 0.011118168041367447
          policy_loss: -0.021664789886701674
          total_loss: 0.3499283471455177
          vf_explained_var: 0.8765549659729004
          vf_loss: 0.3814650948558535
    num_agent_steps_sampled: 195804
    num_agent_steps_trained: 195804
    num_steps_sampled: 195804
    num_steps_trained: 195804
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,98,2628.71,195804,5.5844,10.37,-0.06,102.88


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 197802
  custom_metrics: {}
  date: 2021-11-09_04-29-27
  done: false
  episode_len_mean: 102.51
  episode_media: {}
  episode_reward_max: 10.370000000000015
  episode_reward_mean: 5.677600000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 1824
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.7940905792372568
          entropy_coeff: 0.009999999999999998
          kl: 0.010194597062131694
          policy_loss: -0.04609637590391295
          total_loss: 0.17223639070455518
          vf_explained_var: 0.9272928833961487
          vf_loss: 0.22939231927905765
    num_agent_steps_sampled: 197802
    num_agent_steps_trained: 197802
    num_steps_sampled: 197802
    num_steps_trained: 197802
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,99,2652.58,197802,5.6776,10.37,-0.06,102.51


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 199800
  custom_metrics: {}
  date: 2021-11-09_04-29-50
  done: false
  episode_len_mean: 103.98
  episode_media: {}
  episode_reward_max: 10.370000000000015
  episode_reward_mean: 5.663300000000017
  episode_reward_min: -0.3199999999999992
  episodes_this_iter: 18
  episodes_total: 1842
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.7253523701713198
          entropy_coeff: 0.009999999999999998
          kl: 0.016400816417014116
          policy_loss: -0.07047839359868141
          total_loss: 0.316181759784619
          vf_explained_var: 0.8961905241012573
          vf_loss: 0.3928431253348078
    num_agent_steps_sampled: 199800
    num_agent_steps_trained: 199800
    num_steps_sampled: 199800
    num_steps_trained: 199800
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,100,2675.95,199800,5.6633,10.37,-0.32,103.98


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 201798
  custom_metrics: {}
  date: 2021-11-09_04-30-14
  done: false
  episode_len_mean: 104.15
  episode_media: {}
  episode_reward_max: 10.290000000000019
  episode_reward_mean: 5.619200000000018
  episode_reward_min: -0.3199999999999992
  episodes_this_iter: 18
  episodes_total: 1860
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.7430792626880465
          entropy_coeff: 0.009999999999999998
          kl: 0.01297571193250926
          policy_loss: -0.023005290489111627
          total_loss: 0.35065120698085855
          vf_explained_var: 0.8785028457641602
          vf_loss: 0.38232868689866295
    num_agent_steps_sampled: 201798
    num_agent_steps_trained: 201798
    num_steps_sampled: 201798
    num_steps_trained: 201798
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,101,2699.53,201798,5.6192,10.29,-0.32,104.15


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 203796
  custom_metrics: {}
  date: 2021-11-09_04-30-36
  done: false
  episode_len_mean: 106.74
  episode_media: {}
  episode_reward_max: 10.290000000000019
  episode_reward_mean: 5.732700000000017
  episode_reward_min: -0.3199999999999992
  episodes_this_iter: 17
  episodes_total: 1877
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.728829170408703
          entropy_coeff: 0.009999999999999998
          kl: 0.011156885568682154
          policy_loss: -0.0391074966107096
          total_loss: 0.27326100822538135
          vf_explained_var: 0.8933712840080261
          vf_loss: 0.3221258989402226
    num_agent_steps_sampled: 203796
    num_agent_steps_trained: 203796
    num_steps_sampled: 203796
    num_steps_trained: 203796
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,102,2721.54,203796,5.7327,10.29,-0.32,106.74


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 205794
  custom_metrics: {}
  date: 2021-11-09_04-30-58
  done: false
  episode_len_mean: 110.32
  episode_media: {}
  episode_reward_max: 9.910000000000018
  episode_reward_mean: 5.74580000000002
  episode_reward_min: -0.3199999999999992
  episodes_this_iter: 17
  episodes_total: 1894
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.7630434121404375
          entropy_coeff: 0.009999999999999998
          kl: 0.013453700226606044
          policy_loss: 0.004165806727749961
          total_loss: 0.4004770782022249
          vf_explained_var: 0.8434374332427979
          vf_loss: 0.4048604586294719
    num_agent_steps_sampled: 205794
    num_agent_steps_trained: 205794
    num_steps_sampled: 205794
    num_steps_trained: 205794
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,103,2743.01,205794,5.7458,9.91,-0.32,110.32


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 207792
  custom_metrics: {}
  date: 2021-11-09_04-31-21
  done: false
  episode_len_mean: 112.23
  episode_media: {}
  episode_reward_max: 9.910000000000018
  episode_reward_mean: 5.577400000000017
  episode_reward_min: -0.3199999999999992
  episodes_this_iter: 18
  episodes_total: 1912
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.7882861898058937
          entropy_coeff: 0.009999999999999998
          kl: 0.011407056415554888
          policy_loss: -0.062009920108885994
          total_loss: 0.2823925139027692
          vf_explained_var: 0.8782331347465515
          vf_loss: 0.3545855290478184
    num_agent_steps_sampled: 207792
    num_agent_steps_trained: 207792
    num_steps_sampled: 207792
    num_steps_trained: 207792
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,104,2766.15,207792,5.5774,9.91,-0.32,112.23


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 209790
  custom_metrics: {}
  date: 2021-11-09_04-31-43
  done: false
  episode_len_mean: 113.67
  episode_media: {}
  episode_reward_max: 10.030000000000028
  episode_reward_mean: 5.714800000000017
  episode_reward_min: -0.3199999999999992
  episodes_this_iter: 16
  episodes_total: 1928
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.6521770982515245
          entropy_coeff: 0.009999999999999998
          kl: 0.012731875693231741
          policy_loss: -0.001122830550940264
          total_loss: 0.34422592981940225
          vf_explained_var: 0.8875991106033325
          vf_loss: 0.35327651500701907
    num_agent_steps_sampled: 209790
    num_agent_steps_trained: 209790
    num_steps_sampled: 209790
    num_steps_trained: 209790
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,105,2788.12,209790,5.7148,10.03,-0.32,113.67


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 211788
  custom_metrics: {}
  date: 2021-11-09_04-32-07
  done: false
  episode_len_mean: 112.81
  episode_media: {}
  episode_reward_max: 10.030000000000028
  episode_reward_mean: 5.781800000000019
  episode_reward_min: 1.2900000000000122
  episodes_this_iter: 20
  episodes_total: 1948
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.692890629314241
          entropy_coeff: 0.009999999999999998
          kl: 0.012378045697009647
          policy_loss: -0.07928474176497687
          total_loss: 0.20809661308746963
          vf_explained_var: 0.8827019333839417
          vf_loss: 0.2959550834127835
    num_agent_steps_sampled: 211788
    num_agent_steps_trained: 211788
    num_steps_sampled: 211788
    num_steps_trained: 211788
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,106,2812.62,211788,5.7818,10.03,1.29,112.81


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 213786
  custom_metrics: {}
  date: 2021-11-09_04-32-31
  done: false
  episode_len_mean: 112.25
  episode_media: {}
  episode_reward_max: 10.030000000000028
  episode_reward_mean: 5.760900000000017
  episode_reward_min: 1.0200000000000014
  episodes_this_iter: 18
  episodes_total: 1966
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.7568746555419195
          entropy_coeff: 0.009999999999999998
          kl: 0.012606849456146105
          policy_loss: -0.02122814515605569
          total_loss: 0.39433583372405595
          vf_explained_var: 0.8603055477142334
          vf_loss: 0.42462310379459745
    num_agent_steps_sampled: 213786
    num_agent_steps_trained: 213786
    num_steps_sampled: 213786
    num_steps_trained: 213786
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,107,2836.47,213786,5.7609,10.03,1.02,112.25


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 215784
  custom_metrics: {}
  date: 2021-11-09_04-32-54
  done: false
  episode_len_mean: 110.99
  episode_media: {}
  episode_reward_max: 10.240000000000018
  episode_reward_mean: 5.832500000000019
  episode_reward_min: 1.0200000000000014
  episodes_this_iter: 19
  episodes_total: 1985
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.7014189572561356
          entropy_coeff: 0.009999999999999998
          kl: 0.012400831499261475
          policy_loss: 0.001818045565769786
          total_loss: 0.38311933176148505
          vf_explained_var: 0.8984407782554626
          vf_loss: 0.38994491483483995
    num_agent_steps_sampled: 215784
    num_agent_steps_trained: 215784
    num_steps_sampled: 215784
    num_steps_trained: 215784
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,108,2859.39,215784,5.8325,10.24,1.02,110.99


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 217782
  custom_metrics: {}
  date: 2021-11-09_04-33-17
  done: false
  episode_len_mean: 111.38
  episode_media: {}
  episode_reward_max: 10.240000000000018
  episode_reward_mean: 5.90920000000002
  episode_reward_min: 1.0200000000000014
  episodes_this_iter: 17
  episodes_total: 2002
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.6444898718879337
          entropy_coeff: 0.009999999999999998
          kl: 0.011629702538667302
          policy_loss: -0.03275430104917004
          total_loss: 0.2729147124281597
          vf_explained_var: 0.908051073551178
          vf_loss: 0.3142638614489919
    num_agent_steps_sampled: 217782
    num_agent_steps_trained: 217782
    num_steps_sampled: 217782
    num_steps_trained: 217782
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,109,2881.82,217782,5.9092,10.24,1.02,111.38


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 219780
  custom_metrics: {}
  date: 2021-11-09_04-33-40
  done: false
  episode_len_mean: 111.98
  episode_media: {}
  episode_reward_max: 10.240000000000018
  episode_reward_mean: 6.012400000000019
  episode_reward_min: 1.0200000000000014
  episodes_this_iter: 17
  episodes_total: 2019
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.7291771372159321
          entropy_coeff: 0.009999999999999998
          kl: 0.01106114201029498
          policy_loss: -0.031507366469928196
          total_loss: 0.31450757966155096
          vf_explained_var: 0.8851600885391235
          vf_loss: 0.3558404469064304
    num_agent_steps_sampled: 219780
    num_agent_steps_trained: 219780
    num_steps_sampled: 219780
    num_steps_trained: 219780
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,110,2904.87,219780,6.0124,10.24,1.02,111.98


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 221778
  custom_metrics: {}
  date: 2021-11-09_04-34-03
  done: false
  episode_len_mean: 111.11
  episode_media: {}
  episode_reward_max: 10.240000000000018
  episode_reward_mean: 5.933600000000018
  episode_reward_min: 1.0200000000000014
  episodes_this_iter: 18
  episodes_total: 2037
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.6209150013469515
          entropy_coeff: 0.009999999999999998
          kl: 0.01353330660090034
          policy_loss: -0.03283551459511121
          total_loss: 0.2539057708744492
          vf_explained_var: 0.8830348253250122
          vf_loss: 0.2938154512218067
    num_agent_steps_sampled: 221778
    num_agent_steps_trained: 221778
    num_steps_sampled: 221778
    num_steps_trained: 221778
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,111,2928.26,221778,5.9336,10.24,1.02,111.11


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 223776
  custom_metrics: {}
  date: 2021-11-09_04-34-27
  done: false
  episode_len_mean: 111.11
  episode_media: {}
  episode_reward_max: 10.240000000000018
  episode_reward_mean: 5.757500000000021
  episode_reward_min: 1.0200000000000014
  episodes_this_iter: 18
  episodes_total: 2055
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.675027696859269
          entropy_coeff: 0.009999999999999998
          kl: 0.009876724591046557
          policy_loss: -0.023586970425787425
          total_loss: 0.19875422290393285
          vf_explained_var: 0.9104951620101929
          vf_loss: 0.2324246804983843
    num_agent_steps_sampled: 223776
    num_agent_steps_trained: 223776
    num_steps_sampled: 223776
    num_steps_trained: 223776
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,112,2952.52,223776,5.7575,10.24,1.02,111.11


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 225774
  custom_metrics: {}
  date: 2021-11-09_04-34-50
  done: false
  episode_len_mean: 112.46
  episode_media: {}
  episode_reward_max: 9.75000000000002
  episode_reward_mean: 5.829000000000019
  episode_reward_min: 2.480000000000016
  episodes_this_iter: 18
  episodes_total: 2073
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.7224042795953296
          entropy_coeff: 0.009999999999999998
          kl: 0.011392139519817218
          policy_loss: -0.006491103963482948
          total_loss: 0.29376328775570504
          vf_explained_var: 0.894023060798645
          vf_loss: 0.3097887432291394
    num_agent_steps_sampled: 225774
    num_agent_steps_trained: 225774
    num_steps_sampled: 225774
    num_steps_trained: 225774
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,113,2975.23,225774,5.829,9.75,2.48,112.46


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 227772
  custom_metrics: {}
  date: 2021-11-09_04-35-14
  done: false
  episode_len_mean: 113.42
  episode_media: {}
  episode_reward_max: 9.760000000000026
  episode_reward_mean: 5.713600000000021
  episode_reward_min: 2.22000000000002
  episodes_this_iter: 17
  episodes_total: 2090
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.7221164533070155
          entropy_coeff: 0.009999999999999998
          kl: 0.011439039329343603
          policy_loss: -0.029452141090517952
          total_loss: 0.2688049755014834
          vf_explained_var: 0.891615092754364
          vf_loss: 0.30775693229266576
    num_agent_steps_sampled: 227772
    num_agent_steps_trained: 227772
    num_steps_sampled: 227772
    num_steps_trained: 227772
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,114,2998.85,227772,5.7136,9.76,2.22,113.42


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 229770
  custom_metrics: {}
  date: 2021-11-09_04-35-38
  done: false
  episode_len_mean: 111.69
  episode_media: {}
  episode_reward_max: 9.760000000000026
  episode_reward_mean: 5.7153000000000205
  episode_reward_min: 2.22000000000002
  episodes_this_iter: 20
  episodes_total: 2110
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.6616338037309193
          entropy_coeff: 0.009999999999999998
          kl: 0.015496224081533137
          policy_loss: -0.02484620378485748
          total_loss: 0.29866184985176436
          vf_explained_var: 0.8944061398506165
          vf_loss: 0.3296644404885315
    num_agent_steps_sampled: 229770
    num_agent_steps_trained: 229770
    num_steps_sampled: 229770
    num_steps_trained: 229770
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,115,3022.66,229770,5.7153,9.76,2.22,111.69




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 231768
  custom_metrics: {}
  date: 2021-11-09_04-36-17
  done: false
  episode_len_mean: 110.43
  episode_media: {}
  episode_reward_max: 9.840000000000025
  episode_reward_mean: 5.629800000000021
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 16
  episodes_total: 2126
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.5931826018151782
          entropy_coeff: 0.009999999999999998
          kl: 0.01073646343896984
          policy_loss: -0.04325212715637116
          total_loss: 0.1945485784760898
          vf_explained_var: 0.9122964143753052
          vf_loss: 0.2464854172652676
    num_agent_steps_sampled: 231768
    num_agent_steps_trained: 231768
    num_steps_sampled: 231768
    num_steps_trained: 231768
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,116,3062.52,231768,5.6298,9.84,-0.06,110.43




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 233766
  custom_metrics: {}
  date: 2021-11-09_04-36-55
  done: false
  episode_len_mean: 110.66
  episode_media: {}
  episode_reward_max: 12.06000000000002
  episode_reward_mean: 5.971700000000019
  episode_reward_min: -0.07
  episodes_this_iter: 20
  episodes_total: 2146
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.7166362240200952
          entropy_coeff: 0.009999999999999998
          kl: 0.010042062710551507
          policy_loss: -0.030760999627056577
          total_loss: 0.27014965017636616
          vf_explained_var: 0.8949511647224426
          vf_loss: 0.3112986220845154
    num_agent_steps_sampled: 233766
    num_agent_steps_trained: 233766
    num_steps_sampled: 233766
    num_steps_trained: 233766
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,117,3100.3,233766,5.9717,12.06,-0.07,110.66




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 235764
  custom_metrics: {}
  date: 2021-11-09_04-37-35
  done: false
  episode_len_mean: 110.87
  episode_media: {}
  episode_reward_max: 12.06000000000002
  episode_reward_mean: 6.05940000000002
  episode_reward_min: -0.07
  episodes_this_iter: 17
  episodes_total: 2163
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.664086463337853
          entropy_coeff: 0.009999999999999998
          kl: 0.011425042384829472
          policy_loss: -0.05077826412660735
          total_loss: 0.22980479655698652
          vf_explained_var: 0.9253833293914795
          vf_loss: 0.28951201825624423
    num_agent_steps_sampled: 235764
    num_agent_steps_trained: 235764
    num_steps_sampled: 235764
    num_steps_trained: 235764
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,118,3139.82,235764,6.0594,12.06,-0.07,110.87


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 237762
  custom_metrics: {}
  date: 2021-11-09_04-37-58
  done: false
  episode_len_mean: 110.66
  episode_media: {}
  episode_reward_max: 12.06000000000002
  episode_reward_mean: 6.10040000000002
  episode_reward_min: -0.07
  episodes_this_iter: 17
  episodes_total: 2180
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.664843033041273
          entropy_coeff: 0.009999999999999998
          kl: 0.012739536686755197
          policy_loss: -0.008857807091304234
          total_loss: 0.27942252815479324
          vf_explained_var: 0.9163947701454163
          vf_loss: 0.2963295781896228
    num_agent_steps_sampled: 237762
    num_agent_steps_trained: 237762
    num_steps_sampled: 237762
    num_steps_trained: 237762
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,119,3163.45,237762,6.1004,12.06,-0.07,110.66


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 239760
  custom_metrics: {}
  date: 2021-11-09_04-38-22
  done: false
  episode_len_mean: 111.09
  episode_media: {}
  episode_reward_max: 12.06000000000002
  episode_reward_mean: 6.1445000000000185
  episode_reward_min: -0.07
  episodes_this_iter: 18
  episodes_total: 2198
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.7542475768498011
          entropy_coeff: 0.009999999999999998
          kl: 0.010883539351073406
          policy_loss: -0.04308775858510108
          total_loss: 0.2270581446942829
          vf_explained_var: 0.9218264818191528
          vf_loss: 0.2803419894760563
    num_agent_steps_sampled: 239760
    num_agent_steps_trained: 239760
    num_steps_sampled: 239760
    num_steps_trained: 239760
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,120,3187,239760,6.1445,12.06,-0.07,111.09


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 241758
  custom_metrics: {}
  date: 2021-11-09_04-38-48
  done: false
  episode_len_mean: 110.23
  episode_media: {}
  episode_reward_max: 12.06000000000002
  episode_reward_mean: 6.130700000000021
  episode_reward_min: -0.07
  episodes_this_iter: 19
  episodes_total: 2217
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.6749953854651678
          entropy_coeff: 0.009999999999999998
          kl: 0.009406338315851602
          policy_loss: -0.023458876230177426
          total_loss: 0.21934502690675714
          vf_explained_var: 0.9306496977806091
          vf_loss: 0.2532045790836925
    num_agent_steps_sampled: 241758
    num_agent_steps_trained: 241758
    num_steps_sampled: 241758
    num_steps_trained: 241758
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,121,3213.25,241758,6.1307,12.06,-0.07,110.23


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 243756
  custom_metrics: {}
  date: 2021-11-09_04-39-13
  done: false
  episode_len_mean: 110.67
  episode_media: {}
  episode_reward_max: 10.470000000000018
  episode_reward_mean: 6.202500000000019
  episode_reward_min: 2.3900000000000197
  episodes_this_iter: 19
  episodes_total: 2236
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.6748486445063637
          entropy_coeff: 0.009999999999999998
          kl: 0.014342015661553409
          policy_loss: -0.0882084376311728
          total_loss: 0.3170673911459744
          vf_explained_var: 0.8788401484489441
          vf_loss: 0.412343450316361
    num_agent_steps_sampled: 243756
    num_agent_steps_trained: 243756
    num_steps_sampled: 243756
    num_steps_trained: 243756
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,122,3237.78,243756,6.2025,10.47,2.39,110.67


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 245754
  custom_metrics: {}
  date: 2021-11-09_04-39-38
  done: false
  episode_len_mean: 110.09
  episode_media: {}
  episode_reward_max: 12.110000000000019
  episode_reward_mean: 6.194800000000019
  episode_reward_min: 2.3900000000000197
  episodes_this_iter: 19
  episodes_total: 2255
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.6480806708335876
          entropy_coeff: 0.009999999999999998
          kl: 0.010607230673563735
          policy_loss: 0.0034852534177757443
          total_loss: 0.2740261688163238
          vf_explained_var: 0.9274261593818665
          vf_loss: 0.27986184191845714
    num_agent_steps_sampled: 245754
    num_agent_steps_trained: 245754
    num_steps_sampled: 245754
    num_steps_trained: 245754
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,123,3262.44,245754,6.1948,12.11,2.39,110.09


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 247752
  custom_metrics: {}
  date: 2021-11-09_04-40-01
  done: false
  episode_len_mean: 109.46
  episode_media: {}
  episode_reward_max: 12.110000000000019
  episode_reward_mean: 6.244700000000019
  episode_reward_min: 2.5000000000000155
  episodes_this_iter: 18
  episodes_total: 2273
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.6392156629335313
          entropy_coeff: 0.009999999999999998
          kl: 0.012444946001918672
          policy_loss: -0.027148270429599854
          total_loss: 0.2324360757533993
          vf_explained_var: 0.922684907913208
          vf_loss: 0.2675761651425135
    num_agent_steps_sampled: 247752
    num_agent_steps_trained: 247752
    num_steps_sampled: 247752
    num_steps_trained: 247752
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,124,3285.56,247752,6.2447,12.11,2.5,109.46


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 249750
  custom_metrics: {}
  date: 2021-11-09_04-40-26
  done: false
  episode_len_mean: 107.16
  episode_media: {}
  episode_reward_max: 12.110000000000019
  episode_reward_mean: 6.38100000000002
  episode_reward_min: 2.5000000000000155
  episodes_this_iter: 20
  episodes_total: 2293
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.6379004915555317
          entropy_coeff: 0.009999999999999998
          kl: 0.009950138933594145
          policy_loss: -0.08549079114482516
          total_loss: 0.10611375980079174
          vf_explained_var: 0.9546718001365662
          vf_loss: 0.20126720991517816
    num_agent_steps_sampled: 249750
    num_agent_steps_trained: 249750
    num_steps_sampled: 249750
    num_steps_trained: 249750
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,125,3310.35,249750,6.381,12.11,2.5,107.16


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 251748
  custom_metrics: {}
  date: 2021-11-09_04-40-50
  done: false
  episode_len_mean: 106.21
  episode_media: {}
  episode_reward_max: 12.110000000000019
  episode_reward_mean: 6.412800000000019
  episode_reward_min: 2.5000000000000155
  episodes_this_iter: 18
  episodes_total: 2311
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.6765209896223885
          entropy_coeff: 0.009999999999999998
          kl: 0.009998970260913512
          policy_loss: -0.08549230473027343
          total_loss: 0.1390593481094887
          vf_explained_var: 0.9431438446044922
          vf_loss: 0.23456755688502676
    num_agent_steps_sampled: 251748
    num_agent_steps_trained: 251748
    num_steps_sampled: 251748
    num_steps_trained: 251748
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,126,3334.67,251748,6.4128,12.11,2.5,106.21


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 253746
  custom_metrics: {}
  date: 2021-11-09_04-41-14
  done: false
  episode_len_mean: 106.6
  episode_media: {}
  episode_reward_max: 12.110000000000019
  episode_reward_mean: 6.56550000000002
  episode_reward_min: 2.7100000000000173
  episodes_this_iter: 19
  episodes_total: 2330
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.4835398106347948
          entropy_coeff: 0.009999999999999998
          kl: 0.010618977751409174
          policy_loss: -0.06201386382537229
          total_loss: 0.17382174048217988
          vf_explained_var: 0.9432286024093628
          vf_loss: 0.24350319419588362
    num_agent_steps_sampled: 253746
    num_agent_steps_trained: 253746
    num_steps_sampled: 253746
    num_steps_trained: 253746
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,127,3358.65,253746,6.5655,12.11,2.71,106.6


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 255744
  custom_metrics: {}
  date: 2021-11-09_04-41-39
  done: false
  episode_len_mean: 105.25
  episode_media: {}
  episode_reward_max: 11.92000000000002
  episode_reward_mean: 6.430900000000017
  episode_reward_min: 2.7100000000000173
  episodes_this_iter: 20
  episodes_total: 2350
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.6373967692965552
          entropy_coeff: 0.009999999999999998
          kl: 0.008947799064855854
          policy_loss: -0.0745153766657625
          total_loss: 0.2046789320984057
          vf_explained_var: 0.9397814273834229
          vf_loss: 0.2895285107550167
    num_agent_steps_sampled: 255744
    num_agent_steps_trained: 255744
    num_steps_sampled: 255744
    num_steps_trained: 255744
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,128,3383.44,255744,6.4309,11.92,2.71,105.25


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 257742
  custom_metrics: {}
  date: 2021-11-09_04-42-03
  done: false
  episode_len_mean: 105.3
  episode_media: {}
  episode_reward_max: 11.92000000000002
  episode_reward_mean: 6.593900000000019
  episode_reward_min: 2.600000000000012
  episodes_this_iter: 17
  episodes_total: 2367
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.5537732850937616
          entropy_coeff: 0.009999999999999998
          kl: 0.01589510115273053
          policy_loss: -0.017388560995459556
          total_loss: 0.30454085355713256
          vf_explained_var: 0.9311486482620239
          vf_loss: 0.3267379535096032
    num_agent_steps_sampled: 257742
    num_agent_steps_trained: 257742
    num_steps_sampled: 257742
    num_steps_trained: 257742
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,129,3407.31,257742,6.5939,11.92,2.6,105.3


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 259740
  custom_metrics: {}
  date: 2021-11-09_04-42-28
  done: false
  episode_len_mean: 104.15
  episode_media: {}
  episode_reward_max: 11.92000000000002
  episode_reward_mean: 6.571500000000018
  episode_reward_min: 2.3700000000000183
  episodes_this_iter: 21
  episodes_total: 2388
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.4099393827574593
          entropy_coeff: 0.009999999999999998
          kl: 0.00960917253996912
          policy_loss: -0.016588220931589605
          total_loss: 0.2515775986991468
          vf_explained_var: 0.9352595806121826
          vf_loss: 0.27577902192161197
    num_agent_steps_sampled: 259740
    num_agent_steps_trained: 259740
    num_steps_sampled: 259740
    num_steps_trained: 259740
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,130,3432.46,259740,6.5715,11.92,2.37,104.15


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 261738
  custom_metrics: {}
  date: 2021-11-09_04-42-51
  done: false
  episode_len_mean: 105.21
  episode_media: {}
  episode_reward_max: 12.020000000000024
  episode_reward_mean: 6.719200000000018
  episode_reward_min: 2.3700000000000183
  episodes_this_iter: 18
  episodes_total: 2406
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.5791828933216276
          entropy_coeff: 0.009999999999999998
          kl: 0.01178522383696789
          policy_loss: -0.025503000625897023
          total_loss: 0.2731506501280126
          vf_explained_var: 0.9313633441925049
          vf_loss: 0.3064904535810153
    num_agent_steps_sampled: 261738
    num_agent_steps_trained: 261738
    num_steps_sampled: 261738
    num_steps_trained: 261738
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,131,3455.42,261738,6.7192,12.02,2.37,105.21


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 263736
  custom_metrics: {}
  date: 2021-11-09_04-43-16
  done: false
  episode_len_mean: 105.75
  episode_media: {}
  episode_reward_max: 12.020000000000024
  episode_reward_mean: 6.5782000000000185
  episode_reward_min: 2.3700000000000183
  episodes_this_iter: 19
  episodes_total: 2425
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.6018564723786854
          entropy_coeff: 0.009999999999999998
          kl: 0.010899292151580534
          policy_loss: 0.0029793429055384227
          total_loss: 0.2880108743373837
          vf_explained_var: 0.9155308604240417
          vf_loss: 0.29369307316484905
    num_agent_steps_sampled: 263736
    num_agent_steps_trained: 263736
    num_steps_sampled: 263736
    num_steps_trained: 263736
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,132,3480.2,263736,6.5782,12.02,2.37,105.75


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 265734
  custom_metrics: {}
  date: 2021-11-09_04-43-41
  done: false
  episode_len_mean: 105.8
  episode_media: {}
  episode_reward_max: 12.020000000000024
  episode_reward_mean: 6.502800000000018
  episode_reward_min: 2.3700000000000183
  episodes_this_iter: 18
  episodes_total: 2443
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.6483252763748169
          entropy_coeff: 0.009999999999999998
          kl: 0.010565248027606475
          policy_loss: -0.043630164302885535
          total_loss: 0.11233030696887346
          vf_explained_var: 0.9452325105667114
          vf_loss: 0.16531217843294144
    num_agent_steps_sampled: 265734
    num_agent_steps_trained: 265734
    num_steps_sampled: 265734
    num_steps_trained: 265734
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,133,3505.18,265734,6.5028,12.02,2.37,105.8


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 267732
  custom_metrics: {}
  date: 2021-11-09_04-44-05
  done: false
  episode_len_mean: 105.53
  episode_media: {}
  episode_reward_max: 13.850000000000025
  episode_reward_mean: 6.583200000000018
  episode_reward_min: 2.3700000000000183
  episodes_this_iter: 18
  episodes_total: 2461
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.6074210774330866
          entropy_coeff: 0.009999999999999998
          kl: 0.010317705012833064
          policy_loss: -0.09176220712030218
          total_loss: 0.1535129691562837
          vf_explained_var: 0.9475312829017639
          vf_loss: 0.25438493562950976
    num_agent_steps_sampled: 267732
    num_agent_steps_trained: 267732
    num_steps_sampled: 267732
    num_steps_trained: 267732
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,134,3530.01,267732,6.5832,13.85,2.37,105.53




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 269730
  custom_metrics: {}
  date: 2021-11-09_04-44-44
  done: false
  episode_len_mean: 105.58
  episode_media: {}
  episode_reward_max: 13.850000000000025
  episode_reward_mean: 6.487300000000021
  episode_reward_min: -0.02
  episodes_this_iter: 21
  episodes_total: 2482
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.528582811923254
          entropy_coeff: 0.009999999999999998
          kl: 0.009340113123118117
          policy_loss: -0.05049288045792352
          total_loss: 0.2294970211883386
          vf_explained_var: 0.92828369140625
          vf_loss: 0.28897115236946513
    num_agent_steps_sampled: 269730
    num_agent_steps_trained: 269730
    num_steps_sampled: 269730
    num_steps_trained: 269730
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,135,3568.44,269730,6.4873,13.85,-0.02,105.58


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 271728
  custom_metrics: {}
  date: 2021-11-09_04-45-08
  done: false
  episode_len_mean: 107.44
  episode_media: {}
  episode_reward_max: 13.850000000000025
  episode_reward_mean: 6.4623000000000195
  episode_reward_min: -0.02
  episodes_this_iter: 16
  episodes_total: 2498
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.6327531638599577
          entropy_coeff: 0.009999999999999998
          kl: 0.011978759255282842
          policy_loss: -0.0366647193474429
          total_loss: 0.24241475103689092
          vf_explained_var: 0.9360076785087585
          vf_loss: 0.2873213379865601
    num_agent_steps_sampled: 271728
    num_agent_steps_trained: 271728
    num_steps_sampled: 271728
    num_steps_trained: 271728
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,136,3592.15,271728,6.4623,13.85,-0.02,107.44




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 273726
  custom_metrics: {}
  date: 2021-11-09_04-45-47
  done: false
  episode_len_mean: 107.68
  episode_media: {}
  episode_reward_max: 13.850000000000025
  episode_reward_mean: 6.319900000000019
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 19
  episodes_total: 2517
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.589680898757208
          entropy_coeff: 0.009999999999999998
          kl: 0.013997469003016995
          policy_loss: -0.004236967400425957
          total_loss: 0.44798554962589626
          vf_explained_var: 0.8553228378295898
          vf_loss: 0.4586710341629528
    num_agent_steps_sampled: 273726
    num_agent_steps_trained: 273726
    num_steps_sampled: 273726
    num_steps_trained: 273726
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,137,3631.31,273726,6.3199,13.85,-0.15,107.68


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 275724
  custom_metrics: {}
  date: 2021-11-09_04-46-11
  done: false
  episode_len_mean: 109.74
  episode_media: {}
  episode_reward_max: 13.850000000000025
  episode_reward_mean: 6.455400000000019
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 18
  episodes_total: 2535
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.5464099668321156
          entropy_coeff: 0.009999999999999998
          kl: 0.010788390242636564
          policy_loss: -0.060076482735929033
          total_loss: 0.17985419176873707
          vf_explained_var: 0.9418133497238159
          vf_loss: 0.24811261086946443
    num_agent_steps_sampled: 275724
    num_agent_steps_trained: 275724
    num_steps_sampled: 275724
    num_steps_trained: 275724
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,138,3655.16,275724,6.4554,13.85,-0.15,109.74


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 277722
  custom_metrics: {}
  date: 2021-11-09_04-46-35
  done: false
  episode_len_mean: 109.4
  episode_media: {}
  episode_reward_max: 11.950000000000026
  episode_reward_mean: 6.2481000000000195
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 18
  episodes_total: 2553
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.6186148524284363
          entropy_coeff: 0.009999999999999998
          kl: 0.011772751394535673
          policy_loss: 0.03913448364252136
          total_loss: 0.26707003827073744
          vf_explained_var: 0.9092108011245728
          vf_loss: 0.23617509738320397
    num_agent_steps_sampled: 277722
    num_agent_steps_trained: 277722
    num_steps_sampled: 277722
    num_steps_trained: 277722
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,139,3679.17,277722,6.2481,11.95,-0.15,109.4


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 279720
  custom_metrics: {}
  date: 2021-11-09_04-46-58
  done: false
  episode_len_mean: 111.07
  episode_media: {}
  episode_reward_max: 12.18000000000002
  episode_reward_mean: 6.35570000000002
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 18
  episodes_total: 2571
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.5328132816723414
          entropy_coeff: 0.009999999999999998
          kl: 0.012836415671089606
          policy_loss: -0.0632634325396447
          total_loss: 0.21266306408104443
          vf_explained_var: 0.9207316040992737
          vf_loss: 0.2825900483699072
    num_agent_steps_sampled: 279720
    num_agent_steps_trained: 279720
    num_steps_sampled: 279720
    num_steps_trained: 279720
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,140,3702.1,279720,6.3557,12.18,-0.15,111.07


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 281718
  custom_metrics: {}
  date: 2021-11-09_04-47-20
  done: false
  episode_len_mean: 112.32
  episode_media: {}
  episode_reward_max: 12.18000000000002
  episode_reward_mean: 6.37900000000002
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 18
  episodes_total: 2589
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.4666664418720063
          entropy_coeff: 0.009999999999999998
          kl: 0.010579766571073568
          policy_loss: -0.00625536832071486
          total_loss: 0.2483145378175236
          vf_explained_var: 0.9307595491409302
          vf_loss: 0.2620952281923521
    num_agent_steps_sampled: 281718
    num_agent_steps_trained: 281718
    num_steps_sampled: 281718
    num_steps_trained: 281718
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,141,3724.31,281718,6.379,12.18,-0.15,112.32


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 283716
  custom_metrics: {}
  date: 2021-11-09_04-47-43
  done: false
  episode_len_mean: 112.26
  episode_media: {}
  episode_reward_max: 12.18000000000002
  episode_reward_mean: 6.545200000000022
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 16
  episodes_total: 2605
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.561122113182431
          entropy_coeff: 0.009999999999999998
          kl: 0.010635985263736556
          policy_loss: -0.016357763963086264
          total_loss: 0.19892723732406184
          vf_explained_var: 0.9422924518585205
          vf_loss: 0.2237169314353239
    num_agent_steps_sampled: 283716
    num_agent_steps_trained: 283716
    num_steps_sampled: 283716
    num_steps_trained: 283716
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,142,3747.68,283716,6.5452,12.18,-0.15,112.26


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 285714
  custom_metrics: {}
  date: 2021-11-09_04-48-08
  done: false
  episode_len_mean: 111.15
  episode_media: {}
  episode_reward_max: 12.18000000000002
  episode_reward_mean: 6.853200000000019
  episode_reward_min: 2.780000000000011
  episodes_this_iter: 19
  episodes_total: 2624
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.5376857485089983
          entropy_coeff: 0.009999999999999998
          kl: 0.009116775474520087
          policy_loss: -0.03998335867765404
          total_loss: 0.2044400823435613
          vf_explained_var: 0.9428350329399109
          vf_loss: 0.2536464748638017
    num_agent_steps_sampled: 285714
    num_agent_steps_trained: 285714
    num_steps_sampled: 285714
    num_steps_trained: 285714
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,143,3772.55,285714,6.8532,12.18,2.78,111.15


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 287712
  custom_metrics: {}
  date: 2021-11-09_04-48-32
  done: false
  episode_len_mean: 110.69
  episode_media: {}
  episode_reward_max: 12.18000000000002
  episode_reward_mean: 7.136500000000019
  episode_reward_min: 2.780000000000011
  episodes_this_iter: 19
  episodes_total: 2643
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.5004728754361472
          entropy_coeff: 0.009999999999999998
          kl: 0.011435189022505492
          policy_loss: -0.020788481352584702
          total_loss: 0.2656368977079789
          vf_explained_var: 0.9391801953315735
          vf_loss: 0.293711357599213
    num_agent_steps_sampled: 287712
    num_agent_steps_trained: 287712
    num_steps_sampled: 287712
    num_steps_trained: 287712
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,144,3796.32,287712,7.1365,12.18,2.78,110.69


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 289710
  custom_metrics: {}
  date: 2021-11-09_04-48-55
  done: false
  episode_len_mean: 112.23
  episode_media: {}
  episode_reward_max: 12.18000000000002
  episode_reward_mean: 7.26650000000002
  episode_reward_min: 2.780000000000011
  episodes_this_iter: 17
  episodes_total: 2660
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.532057000909533
          entropy_coeff: 0.009999999999999998
          kl: 0.016194177552740364
          policy_loss: -0.02426821037772156
          total_loss: 0.4228643010592177
          vf_explained_var: 0.8915917277336121
          vf_loss: 0.45152201194848335
    num_agent_steps_sampled: 289710
    num_agent_steps_trained: 289710
    num_steps_sampled: 289710
    num_steps_trained: 289710
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,145,3819,289710,7.2665,12.18,2.78,112.23


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 291708
  custom_metrics: {}
  date: 2021-11-09_04-49-19
  done: false
  episode_len_mean: 113.02
  episode_media: {}
  episode_reward_max: 12.040000000000022
  episode_reward_mean: 7.25010000000002
  episode_reward_min: 2.780000000000011
  episodes_this_iter: 16
  episodes_total: 2676
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.521150419257936
          entropy_coeff: 0.009999999999999998
          kl: 0.013698381829426847
          policy_loss: -0.05067432356022653
          total_loss: 0.3352051391621076
          vf_explained_var: 0.8979358673095703
          vf_loss: 0.3918445561613355
    num_agent_steps_sampled: 291708
    num_agent_steps_trained: 291708
    num_steps_sampled: 291708
    num_steps_trained: 291708
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,146,3843.57,291708,7.2501,12.04,2.78,113.02


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 293706
  custom_metrics: {}
  date: 2021-11-09_04-49-44
  done: false
  episode_len_mean: 111.11
  episode_media: {}
  episode_reward_max: 12.040000000000022
  episode_reward_mean: 7.236800000000019
  episode_reward_min: 2.780000000000011
  episodes_this_iter: 19
  episodes_total: 2695
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.5512729099818638
          entropy_coeff: 0.009999999999999998
          kl: 0.010245937050272274
          policy_loss: 0.0067382377971495905
          total_loss: 0.3661775347732362
          vf_explained_var: 0.8908097147941589
          vf_loss: 0.3680360197311356
    num_agent_steps_sampled: 293706
    num_agent_steps_trained: 293706
    num_steps_sampled: 293706
    num_steps_trained: 293706
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,147,3868.02,293706,7.2368,12.04,2.78,111.11


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 295704
  custom_metrics: {}
  date: 2021-11-09_04-50-07
  done: false
  episode_len_mean: 112.04
  episode_media: {}
  episode_reward_max: 12.20000000000002
  episode_reward_mean: 7.098500000000021
  episode_reward_min: 2.4600000000000177
  episodes_this_iter: 18
  episodes_total: 2713
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.5850680657795497
          entropy_coeff: 0.009999999999999998
          kl: 0.008900745059193176
          policy_loss: -0.023339697452528136
          total_loss: 0.2256933876446315
          vf_explained_var: 0.9494208693504333
          vf_loss: 0.25887576472901164
    num_agent_steps_sampled: 295704
    num_agent_steps_trained: 295704
    num_steps_sampled: 295704
    num_steps_trained: 295704
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,148,3891.11,295704,7.0985,12.2,2.46,112.04


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 297702
  custom_metrics: {}
  date: 2021-11-09_04-50-31
  done: false
  episode_len_mean: 113.12
  episode_media: {}
  episode_reward_max: 13.440000000000026
  episode_reward_mean: 7.14150000000002
  episode_reward_min: 2.4600000000000177
  episodes_this_iter: 17
  episodes_total: 2730
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.516264678183056
          entropy_coeff: 0.009999999999999998
          kl: 0.010684777631725198
          policy_loss: -0.031145275561582475
          total_loss: 0.1359033298279558
          vf_explained_var: 0.9624699354171753
          vf_loss: 0.17499902755731628
    num_agent_steps_sampled: 297702
    num_agent_steps_trained: 297702
    num_steps_sampled: 297702
    num_steps_trained: 297702
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,149,3915.15,297702,7.1415,13.44,2.46,113.12


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 299700
  custom_metrics: {}
  date: 2021-11-09_04-50-54
  done: false
  episode_len_mean: 113.92
  episode_media: {}
  episode_reward_max: 13.440000000000026
  episode_reward_mean: 6.934200000000021
  episode_reward_min: 2.4600000000000177
  episodes_this_iter: 17
  episodes_total: 2747
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.4821203169368562
          entropy_coeff: 0.009999999999999998
          kl: 0.018334382429543643
          policy_loss: 0.006048522321950822
          total_loss: 0.4832700554902355
          vf_explained_var: 0.8905865550041199
          vf_loss: 0.479667027897778
    num_agent_steps_sampled: 299700
    num_agent_steps_trained: 299700
    num_steps_sampled: 299700
    num_steps_trained: 299700
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,150,3937.59,299700,6.9342,13.44,2.46,113.92


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 301698
  custom_metrics: {}
  date: 2021-11-09_04-51-17
  done: false
  episode_len_mean: 113.67
  episode_media: {}
  episode_reward_max: 13.440000000000026
  episode_reward_mean: 6.9538000000000215
  episode_reward_min: 2.4600000000000177
  episodes_this_iter: 17
  episodes_total: 2764
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.6322251546950568
          entropy_coeff: 0.009999999999999998
          kl: 0.010891701158161425
          policy_loss: 0.0019428902793498265
          total_loss: 0.2993443801999092
          vf_explained_var: 0.9232727289199829
          vf_loss: 0.30637184569523446
    num_agent_steps_sampled: 301698
    num_agent_steps_trained: 301698
    num_steps_sampled: 301698
    num_steps_trained: 301698
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,151,3961.22,301698,6.9538,13.44,2.46,113.67


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 303696
  custom_metrics: {}
  date: 2021-11-09_04-51-42
  done: false
  episode_len_mean: 113.09
  episode_media: {}
  episode_reward_max: 13.440000000000026
  episode_reward_mean: 7.052800000000018
  episode_reward_min: 2.4600000000000177
  episodes_this_iter: 20
  episodes_total: 2784
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.5269176046053567
          entropy_coeff: 0.009999999999999998
          kl: 0.008633343899449166
          policy_loss: -0.010893575563317253
          total_loss: 0.18465427277343613
          vf_explained_var: 0.9481016397476196
          vf_loss: 0.2049895176930087
    num_agent_steps_sampled: 303696
    num_agent_steps_trained: 303696
    num_steps_sampled: 303696
    num_steps_trained: 303696
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,152,3985.6,303696,7.0528,13.44,2.46,113.09


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 305694
  custom_metrics: {}
  date: 2021-11-09_04-52-05
  done: false
  episode_len_mean: 113.12
  episode_media: {}
  episode_reward_max: 13.440000000000026
  episode_reward_mean: 7.1113000000000195
  episode_reward_min: -1.1200000000000008
  episodes_this_iter: 17
  episodes_total: 2801
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.4868015300659907
          entropy_coeff: 0.009999999999999998
          kl: 0.020830810458364987
          policy_loss: 0.016539094508403823
          total_loss: 0.3756082451081879
          vf_explained_var: 0.938480794429779
          vf_loss: 0.3598763670949709
    num_agent_steps_sampled: 305694
    num_agent_steps_trained: 305694
    num_steps_sampled: 305694
    num_steps_trained: 305694
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,153,4008.83,305694,7.1113,13.44,-1.12,113.12


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 307692
  custom_metrics: {}
  date: 2021-11-09_04-52-28
  done: false
  episode_len_mean: 112.94
  episode_media: {}
  episode_reward_max: 14.010000000000023
  episode_reward_mean: 7.21200000000002
  episode_reward_min: -1.1200000000000008
  episodes_this_iter: 17
  episodes_total: 2818
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5523657764707293
          entropy_coeff: 0.009999999999999998
          kl: 0.006987610134272464
          policy_loss: -0.05021489777025722
          total_loss: 0.18390590433208714
          vf_explained_var: 0.9327701926231384
          vf_loss: 0.24256950308169636
    num_agent_steps_sampled: 307692
    num_agent_steps_trained: 307692
    num_steps_sampled: 307692
    num_steps_trained: 307692
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,154,4031.52,307692,7.212,14.01,-1.12,112.94


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 309690
  custom_metrics: {}
  date: 2021-11-09_04-52-51
  done: false
  episode_len_mean: 113.19
  episode_media: {}
  episode_reward_max: 14.010000000000023
  episode_reward_mean: 7.0423000000000195
  episode_reward_min: -1.1200000000000008
  episodes_this_iter: 17
  episodes_total: 2835
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.503391959553673
          entropy_coeff: 0.009999999999999998
          kl: 0.009029799883548325
          policy_loss: -0.001989179015869186
          total_loss: 0.18925794881014596
          vf_explained_var: 0.9331493377685547
          vf_loss: 0.19713837486647423
    num_agent_steps_sampled: 309690
    num_agent_steps_trained: 309690
    num_steps_sampled: 309690
    num_steps_trained: 309690
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,155,4054.99,309690,7.0423,14.01,-1.12,113.19




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 311688
  custom_metrics: {}
  date: 2021-11-09_04-53-40
  done: false
  episode_len_mean: 111.89
  episode_media: {}
  episode_reward_max: 14.010000000000023
  episode_reward_mean: 7.1597000000000195
  episode_reward_min: -1.1200000000000008
  episodes_this_iter: 20
  episodes_total: 2855
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4992290275437492
          entropy_coeff: 0.009999999999999998
          kl: 0.01137758487694388
          policy_loss: -0.03889257393422581
          total_loss: 0.4662999544647478
          vf_explained_var: 0.8952781558036804
          vf_loss: 0.5086650129230249
    num_agent_steps_sampled: 311688
    num_agent_steps_trained: 311688
    num_steps_sampled: 311688
    num_steps_trained: 311688
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,156,4103.75,311688,7.1597,14.01,-1.12,111.89




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 313686
  custom_metrics: {}
  date: 2021-11-09_04-54-17
  done: false
  episode_len_mean: 110.89
  episode_media: {}
  episode_reward_max: 14.010000000000023
  episode_reward_mean: 6.96670000000002
  episode_reward_min: -1.1200000000000008
  episodes_this_iter: 18
  episodes_total: 2873
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4784684430985224
          entropy_coeff: 0.009999999999999998
          kl: 0.01256172972020859
          policy_loss: 0.00024830973928882964
          total_loss: 0.3917773618229798
          vf_explained_var: 0.9027294516563416
          vf_loss: 0.3935949812332789
    num_agent_steps_sampled: 313686
    num_agent_steps_trained: 313686
    num_steps_sampled: 313686
    num_steps_trained: 313686
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,157,4140.78,313686,6.9667,14.01,-1.12,110.89


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 315684
  custom_metrics: {}
  date: 2021-11-09_04-54-41
  done: false
  episode_len_mean: 110.3
  episode_media: {}
  episode_reward_max: 14.010000000000023
  episode_reward_mean: 7.101600000000019
  episode_reward_min: -0.07
  episodes_this_iter: 19
  episodes_total: 2892
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5275228250594366
          entropy_coeff: 0.009999999999999998
          kl: 0.00897992144954299
          policy_loss: -0.07447296176992711
          total_loss: 0.133515510424262
          vf_explained_var: 0.9504634141921997
          vf_loss: 0.21417153030633926
    num_agent_steps_sampled: 315684
    num_agent_steps_trained: 315684
    num_steps_sampled: 315684
    num_steps_trained: 315684
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,158,4164.37,315684,7.1016,14.01,-0.07,110.3


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 317682
  custom_metrics: {}
  date: 2021-11-09_04-55-03
  done: false
  episode_len_mean: 111.62
  episode_media: {}
  episode_reward_max: 14.010000000000023
  episode_reward_mean: 7.0770000000000195
  episode_reward_min: -0.07
  episodes_this_iter: 16
  episodes_total: 2908
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4483274618784587
          entropy_coeff: 0.009999999999999998
          kl: 0.009985652372088942
          policy_loss: -0.03115958110207603
          total_loss: 0.35482489093251174
          vf_explained_var: 0.895455002784729
          vf_loss: 0.39035727374610446
    num_agent_steps_sampled: 317682
    num_agent_steps_trained: 317682
    num_steps_sampled: 317682
    num_steps_trained: 317682
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,159,4186.3,317682,7.077,14.01,-0.07,111.62


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 319680
  custom_metrics: {}
  date: 2021-11-09_04-55-26
  done: false
  episode_len_mean: 111.08
  episode_media: {}
  episode_reward_max: 13.880000000000019
  episode_reward_mean: 6.98970000000002
  episode_reward_min: -0.07
  episodes_this_iter: 18
  episodes_total: 2926
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4380874179658436
          entropy_coeff: 0.009999999999999998
          kl: 0.00802669545776622
          policy_loss: -0.0060116816489469435
          total_loss: 0.25200925370057425
          vf_explained_var: 0.947057843208313
          vf_loss: 0.26427478141018323
    num_agent_steps_sampled: 319680
    num_agent_steps_trained: 319680
    num_steps_sampled: 319680
    num_steps_trained: 319680
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,160,4210.11,319680,6.9897,13.88,-0.07,111.08


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 321678
  custom_metrics: {}
  date: 2021-11-09_04-55-50
  done: false
  episode_len_mean: 110.15
  episode_media: {}
  episode_reward_max: 13.880000000000019
  episode_reward_mean: 6.933100000000017
  episode_reward_min: -0.07
  episodes_this_iter: 19
  episodes_total: 2945
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.560276704742795
          entropy_coeff: 0.009999999999999998
          kl: 0.01072091653046831
          policy_loss: -0.01980140448680946
          total_loss: 0.2967786318489483
          vf_explained_var: 0.9236714839935303
          vf_loss: 0.32132787108421323
    num_agent_steps_sampled: 321678
    num_agent_steps_trained: 321678
    num_steps_sampled: 321678
    num_steps_trained: 321678
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,161,4234.16,321678,6.9331,13.88,-0.07,110.15


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 323676
  custom_metrics: {}
  date: 2021-11-09_04-56-14
  done: false
  episode_len_mean: 112.4
  episode_media: {}
  episode_reward_max: 13.880000000000019
  episode_reward_mean: 7.123500000000022
  episode_reward_min: 2.5700000000000127
  episodes_this_iter: 17
  episodes_total: 2962
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5153870446341378
          entropy_coeff: 0.009999999999999998
          kl: 0.00833727192801034
          policy_loss: -0.00889404687498297
          total_loss: 0.24538212862043154
          vf_explained_var: 0.9269796013832092
          vf_loss: 0.2609885555292879
    num_agent_steps_sampled: 323676
    num_agent_steps_trained: 323676
    num_steps_sampled: 323676
    num_steps_trained: 323676
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,162,4257.68,323676,7.1235,13.88,2.57,112.4


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 325674
  custom_metrics: {}
  date: 2021-11-09_04-56-38
  done: false
  episode_len_mean: 112.52
  episode_media: {}
  episode_reward_max: 13.880000000000019
  episode_reward_mean: 7.37010000000002
  episode_reward_min: 2.5700000000000127
  episodes_this_iter: 18
  episodes_total: 2980
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5539642033122836
          entropy_coeff: 0.009999999999999998
          kl: 0.008979231877874255
          policy_loss: -0.04722694231285935
          total_loss: 0.18591425248554774
          vf_explained_var: 0.9536747336387634
          vf_loss: 0.23958936447188967
    num_agent_steps_sampled: 325674
    num_agent_steps_trained: 325674
    num_steps_sampled: 325674
    num_steps_trained: 325674
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,163,4281.19,325674,7.3701,13.88,2.57,112.52


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 327672
  custom_metrics: {}
  date: 2021-11-09_04-57-02
  done: false
  episode_len_mean: 110.84
  episode_media: {}
  episode_reward_max: 13.880000000000019
  episode_reward_mean: 7.13680000000002
  episode_reward_min: 2.5700000000000127
  episodes_this_iter: 19
  episodes_total: 2999
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4830062661852155
          entropy_coeff: 0.009999999999999998
          kl: 0.008812424608373642
          policy_loss: -0.025478823926477205
          total_loss: 0.30625487989967776
          vf_explained_var: 0.9288211464881897
          vf_loss: 0.337641187508901
    num_agent_steps_sampled: 327672
    num_agent_steps_trained: 327672
    num_steps_sampled: 327672
    num_steps_trained: 327672
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,164,4305.44,327672,7.1368,13.88,2.57,110.84


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 329670
  custom_metrics: {}
  date: 2021-11-09_04-57-26
  done: false
  episode_len_mean: 111.1
  episode_media: {}
  episode_reward_max: 13.880000000000019
  episode_reward_mean: 7.269100000000019
  episode_reward_min: 2.5700000000000127
  episodes_this_iter: 18
  episodes_total: 3017
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5446481324377515
          entropy_coeff: 0.009999999999999998
          kl: 0.010163063325630007
          policy_loss: 0.0061526678679954434
          total_loss: 0.2924919461210569
          vf_explained_var: 0.9194092750549316
          vf_loss: 0.29149565739291056
    num_agent_steps_sampled: 329670
    num_agent_steps_trained: 329670
    num_steps_sampled: 329670
    num_steps_trained: 329670
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,165,4329.99,329670,7.2691,13.88,2.57,111.1


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 331668
  custom_metrics: {}
  date: 2021-11-09_04-57-51
  done: false
  episode_len_mean: 108.81
  episode_media: {}
  episode_reward_max: 13.770000000000026
  episode_reward_mean: 7.061700000000019
  episode_reward_min: 2.580000000000016
  episodes_this_iter: 20
  episodes_total: 3037
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4848793012755257
          entropy_coeff: 0.009999999999999998
          kl: 0.006816112331619117
          policy_loss: -0.02886978542166097
          total_loss: 0.1523721597246116
          vf_explained_var: 0.9439454078674316
          vf_loss: 0.18918942418836412
    num_agent_steps_sampled: 331668
    num_agent_steps_trained: 331668
    num_steps_sampled: 331668
    num_steps_trained: 331668
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,166,4354.81,331668,7.0617,13.77,2.58,108.81


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 333666
  custom_metrics: {}
  date: 2021-11-09_04-58-15
  done: false
  episode_len_mean: 108.24
  episode_media: {}
  episode_reward_max: 13.770000000000026
  episode_reward_mean: 7.056100000000018
  episode_reward_min: 2.580000000000016
  episodes_this_iter: 18
  episodes_total: 3055
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5227211515108745
          entropy_coeff: 0.009999999999999998
          kl: 0.01371973551438755
          policy_loss: -0.04439199907439095
          total_loss: 0.2742430556033339
          vf_explained_var: 0.9174363613128662
          vf_loss: 0.3199710307376725
    num_agent_steps_sampled: 333666
    num_agent_steps_trained: 333666
    num_steps_sampled: 333666
    num_steps_trained: 333666
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,167,4378.52,333666,7.0561,13.77,2.58,108.24


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 335664
  custom_metrics: {}
  date: 2021-11-09_04-58-39
  done: false
  episode_len_mean: 107.9
  episode_media: {}
  episode_reward_max: 13.770000000000026
  episode_reward_mean: 7.008500000000018
  episode_reward_min: 2.580000000000016
  episodes_this_iter: 18
  episodes_total: 3073
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.527630649294172
          entropy_coeff: 0.009999999999999998
          kl: 0.00906033604000723
          policy_loss: -0.04191875177479926
          total_loss: 0.2486942297025096
          vf_explained_var: 0.926408052444458
          vf_loss: 0.2967156959431512
    num_agent_steps_sampled: 335664
    num_agent_steps_trained: 335664
    num_steps_sampled: 335664
    num_steps_trained: 335664
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,168,4402.59,335664,7.0085,13.77,2.58,107.9


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 337662
  custom_metrics: {}
  date: 2021-11-09_04-59-02
  done: false
  episode_len_mean: 109.01
  episode_media: {}
  episode_reward_max: 13.770000000000026
  episode_reward_mean: 7.15430000000002
  episode_reward_min: 2.580000000000016
  episodes_this_iter: 17
  episodes_total: 3090
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5108513877505347
          entropy_coeff: 0.009999999999999998
          kl: 0.010209219546631567
          policy_loss: -0.010912171396471205
          total_loss: 0.31317483192043644
          vf_explained_var: 0.9356394410133362
          vf_loss: 0.3288586798168364
    num_agent_steps_sampled: 337662
    num_agent_steps_trained: 337662
    num_steps_sampled: 337662
    num_steps_trained: 337662
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,169,4425.41,337662,7.1543,13.77,2.58,109.01


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 339660
  custom_metrics: {}
  date: 2021-11-09_04-59-25
  done: false
  episode_len_mean: 109.97
  episode_media: {}
  episode_reward_max: 13.880000000000022
  episode_reward_mean: 7.28420000000002
  episode_reward_min: 2.750000000000016
  episodes_this_iter: 18
  episodes_total: 3108
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4412285691215878
          entropy_coeff: 0.009999999999999998
          kl: 0.011049054365065982
          policy_loss: -0.015850448076214108
          total_loss: 0.2870869188613835
          vf_explained_var: 0.9216423630714417
          vf_loss: 0.30616248512551897
    num_agent_steps_sampled: 339660
    num_agent_steps_trained: 339660
    num_steps_sampled: 339660
    num_steps_trained: 339660
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,170,4448.58,339660,7.2842,13.88,2.75,109.97


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 341658
  custom_metrics: {}
  date: 2021-11-09_04-59-48
  done: false
  episode_len_mean: 110.26
  episode_media: {}
  episode_reward_max: 13.880000000000022
  episode_reward_mean: 7.1848000000000205
  episode_reward_min: 2.8900000000000143
  episodes_this_iter: 18
  episodes_total: 3126
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.407319652466547
          entropy_coeff: 0.009999999999999998
          kl: 0.008419253438297417
          policy_loss: 0.0035014226855266664
          total_loss: 0.2503793896619408
          vf_explained_var: 0.9241684675216675
          vf_loss: 0.2524266710593587
    num_agent_steps_sampled: 341658
    num_agent_steps_trained: 341658
    num_steps_sampled: 341658
    num_steps_trained: 341658
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,171,4471.75,341658,7.1848,13.88,2.89,110.26


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 343656
  custom_metrics: {}
  date: 2021-11-09_05-00-12
  done: false
  episode_len_mean: 110.93
  episode_media: {}
  episode_reward_max: 13.880000000000022
  episode_reward_mean: 7.470200000000019
  episode_reward_min: 2.8900000000000143
  episodes_this_iter: 19
  episodes_total: 3145
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.50254507178352
          entropy_coeff: 0.009999999999999998
          kl: 0.009285916016637778
          policy_loss: -0.04316062756947109
          total_loss: 0.17022601803321213
          vf_explained_var: 0.9531794786453247
          vf_loss: 0.2190101063499848
    num_agent_steps_sampled: 343656
    num_agent_steps_trained: 343656
    num_steps_sampled: 343656
    num_steps_trained: 343656
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,172,4495.55,343656,7.4702,13.88,2.89,110.93


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 345654
  custom_metrics: {}
  date: 2021-11-09_05-00-36
  done: false
  episode_len_mean: 111.57
  episode_media: {}
  episode_reward_max: 13.880000000000022
  episode_reward_mean: 7.69900000000002
  episode_reward_min: 4.10000000000002
  episodes_this_iter: 17
  episodes_total: 3162
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4639772137006124
          entropy_coeff: 0.009999999999999998
          kl: 0.011131358878777236
          policy_loss: -0.03741113165659564
          total_loss: 0.31295572115729253
          vf_explained_var: 0.9374841451644897
          vf_loss: 0.3537361231588182
    num_agent_steps_sampled: 345654
    num_agent_steps_trained: 345654
    num_steps_sampled: 345654
    num_steps_trained: 345654
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,173,4518.97,345654,7.699,13.88,4.1,111.57


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 347652
  custom_metrics: {}
  date: 2021-11-09_05-00-59
  done: false
  episode_len_mean: 110.98
  episode_media: {}
  episode_reward_max: 13.880000000000022
  episode_reward_mean: 7.67480000000002
  episode_reward_min: 4.210000000000019
  episodes_this_iter: 19
  episodes_total: 3181
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4948701529275803
          entropy_coeff: 0.009999999999999998
          kl: 0.008655602540593187
          policy_loss: -0.051333104446530345
          total_loss: 0.1618297178298235
          vf_explained_var: 0.9540862441062927
          vf_loss: 0.21934772685524964
    num_agent_steps_sampled: 347652
    num_agent_steps_trained: 347652
    num_steps_sampled: 347652
    num_steps_trained: 347652
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,174,4542.18,347652,7.6748,13.88,4.21,110.98




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 349650
  custom_metrics: {}
  date: 2021-11-09_05-01-39
  done: false
  episode_len_mean: 109.55
  episode_media: {}
  episode_reward_max: 13.880000000000022
  episode_reward_mean: 7.54760000000002
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 19
  episodes_total: 3200
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4751422479039147
          entropy_coeff: 0.009999999999999998
          kl: 0.010673448871619896
          policy_loss: -0.022728771538961502
          total_loss: 0.2758964895137719
          vf_explained_var: 0.9342378973960876
          vf_loss: 0.3025698150197665
    num_agent_steps_sampled: 349650
    num_agent_steps_trained: 349650
    num_steps_sampled: 349650
    num_steps_trained: 349650
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,175,4582.19,349650,7.5476,13.88,-0.15,109.55




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 351648
  custom_metrics: {}
  date: 2021-11-09_05-02-34
  done: false
  episode_len_mean: 105.6
  episode_media: {}
  episode_reward_max: 13.870000000000022
  episode_reward_mean: 7.47150000000002
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 21
  episodes_total: 3221
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3819775206702096
          entropy_coeff: 0.009999999999999998
          kl: 0.00796979460140831
          policy_loss: -0.023330905785163242
          total_loss: 0.2574172794375391
          vf_explained_var: 0.9411168694496155
          vf_loss: 0.2864985390788033
    num_agent_steps_sampled: 351648
    num_agent_steps_trained: 351648
    num_steps_sampled: 351648
    num_steps_trained: 351648
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,176,4636.83,351648,7.4715,13.87,-0.15,105.6


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 353646
  custom_metrics: {}
  date: 2021-11-09_05-02-58
  done: false
  episode_len_mean: 104.9
  episode_media: {}
  episode_reward_max: 13.870000000000022
  episode_reward_mean: 7.280300000000018
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 19
  episodes_total: 3240
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.561210712932405
          entropy_coeff: 0.009999999999999998
          kl: 0.007439239030980529
          policy_loss: -0.04079155897100766
          total_loss: 0.16585679323900313
          vf_explained_var: 0.9266918897628784
          vf_loss: 0.2147282313732874
    num_agent_steps_sampled: 353646
    num_agent_steps_trained: 353646
    num_steps_sampled: 353646
    num_steps_trained: 353646
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,177,4661.61,353646,7.2803,13.87,-0.15,104.9


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 355644
  custom_metrics: {}
  date: 2021-11-09_05-03-24
  done: false
  episode_len_mean: 103.49
  episode_media: {}
  episode_reward_max: 13.320000000000023
  episode_reward_mean: 6.9813000000000205
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 19
  episodes_total: 3259
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5265572144871666
          entropy_coeff: 0.009999999999999998
          kl: 0.009694122747173258
          policy_loss: -0.04366251028009823
          total_loss: 0.26212661145698457
          vf_explained_var: 0.9328399896621704
          vf_loss: 0.31123939284256524
    num_agent_steps_sampled: 355644
    num_agent_steps_trained: 355644
    num_steps_sampled: 355644
    num_steps_trained: 355644
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,178,4686.71,355644,6.9813,13.32,-0.15,103.49


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 357642
  custom_metrics: {}
  date: 2021-11-09_05-03-47
  done: false
  episode_len_mean: 102.88
  episode_media: {}
  episode_reward_max: 13.560000000000024
  episode_reward_mean: 6.983100000000019
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 19
  episodes_total: 3278
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.644693198090508
          entropy_coeff: 0.009999999999999998
          kl: 0.010254308397025355
          policy_loss: -0.05535655392422562
          total_loss: 0.16120240599626587
          vf_explained_var: 0.9433718919754028
          vf_loss: 0.2226234035300357
    num_agent_steps_sampled: 357642
    num_agent_steps_trained: 357642
    num_steps_sampled: 357642
    num_steps_trained: 357642
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,179,4710.31,357642,6.9831,13.56,-0.15,102.88


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 359640
  custom_metrics: {}
  date: 2021-11-09_05-04-11
  done: false
  episode_len_mean: 101.85
  episode_media: {}
  episode_reward_max: 13.560000000000024
  episode_reward_mean: 7.017300000000018
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 19
  episodes_total: 3297
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4137522952897208
          entropy_coeff: 0.009999999999999998
          kl: 0.0138743821772424
          policy_loss: 0.0044541473189989725
          total_loss: 0.43096484139206864
          vf_explained_var: 0.937518298625946
          vf_loss: 0.42660040459817367
    num_agent_steps_sampled: 359640
    num_agent_steps_trained: 359640
    num_steps_sampled: 359640
    num_steps_trained: 359640
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,180,4734.03,359640,7.0173,13.56,-0.15,101.85


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 361638
  custom_metrics: {}
  date: 2021-11-09_05-04-34
  done: false
  episode_len_mean: 105.86
  episode_media: {}
  episode_reward_max: 13.560000000000024
  episode_reward_mean: 7.268400000000018
  episode_reward_min: 2.620000000000012
  episodes_this_iter: 18
  episodes_total: 3315
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.490531077271416
          entropy_coeff: 0.009999999999999998
          kl: 0.008414433181799958
          policy_loss: -0.031655286926598775
          total_loss: 0.1698241106101445
          vf_explained_var: 0.9622381329536438
          vf_loss: 0.20786509327590466
    num_agent_steps_sampled: 361638
    num_agent_steps_trained: 361638
    num_steps_sampled: 361638
    num_steps_trained: 361638
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,181,4757.18,361638,7.2684,13.56,2.62,105.86


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 363636
  custom_metrics: {}
  date: 2021-11-09_05-04-58
  done: false
  episode_len_mean: 107.11
  episode_media: {}
  episode_reward_max: 13.560000000000024
  episode_reward_mean: 7.348900000000019
  episode_reward_min: 2.620000000000012
  episodes_this_iter: 18
  episodes_total: 3333
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5405912490118117
          entropy_coeff: 0.009999999999999998
          kl: 0.007383843216025653
          policy_loss: -0.054246283446749054
          total_loss: 0.1251230173345123
          vf_explained_var: 0.9547563791275024
          vf_loss: 0.18729907150069872
    num_agent_steps_sampled: 363636
    num_agent_steps_trained: 363636
    num_steps_sampled: 363636
    num_steps_trained: 363636
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,182,4780.75,363636,7.3489,13.56,2.62,107.11


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 365634
  custom_metrics: {}
  date: 2021-11-09_05-05-22
  done: false
  episode_len_mean: 107.68
  episode_media: {}
  episode_reward_max: 13.670000000000021
  episode_reward_mean: 7.3952000000000195
  episode_reward_min: 2.8700000000000156
  episodes_this_iter: 19
  episodes_total: 3352
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4788837608836947
          entropy_coeff: 0.009999999999999998
          kl: 0.010346902620208133
          policy_loss: -0.0031489738396235875
          total_loss: 0.27389230953662524
          vf_explained_var: 0.9517612457275391
          vf_loss: 0.281353884154842
    num_agent_steps_sampled: 365634
    num_agent_steps_trained: 365634
    num_steps_sampled: 365634
    num_steps_trained: 365634
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,183,4805.17,365634,7.3952,13.67,2.87,107.68


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 367632
  custom_metrics: {}
  date: 2021-11-09_05-05-46
  done: false
  episode_len_mean: 107.86
  episode_media: {}
  episode_reward_max: 14.300000000000022
  episode_reward_mean: 7.7824000000000195
  episode_reward_min: 3.0800000000000107
  episodes_this_iter: 18
  episodes_total: 3370
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3881896098454793
          entropy_coeff: 0.009999999999999998
          kl: 0.009454664274370406
          policy_loss: -0.05250021446318853
          total_loss: 0.16303338388069755
          vf_explained_var: 0.9614067673683167
          vf_loss: 0.21984264330849762
    num_agent_steps_sampled: 367632
    num_agent_steps_trained: 367632
    num_steps_sampled: 367632
    num_steps_trained: 367632
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,184,4828.54,367632,7.7824,14.3,3.08,107.86


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 369630
  custom_metrics: {}
  date: 2021-11-09_05-06-09
  done: false
  episode_len_mean: 108.15
  episode_media: {}
  episode_reward_max: 14.300000000000022
  episode_reward_mean: 7.701000000000019
  episode_reward_min: 0.8000000000000129
  episodes_this_iter: 18
  episodes_total: 3388
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5431685708817982
          entropy_coeff: 0.009999999999999998
          kl: 0.015591636628147812
          policy_loss: -0.029219376650594528
          total_loss: 0.39642133289682013
          vf_explained_var: 0.9426615238189697
          vf_loss: 0.4252858643375692
    num_agent_steps_sampled: 369630
    num_agent_steps_trained: 369630
    num_steps_sampled: 369630
    num_steps_trained: 369630
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,185,4852.09,369630,7.701,14.3,0.8,108.15


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 371628
  custom_metrics: {}
  date: 2021-11-09_05-06-32
  done: false
  episode_len_mean: 110.33
  episode_media: {}
  episode_reward_max: 14.300000000000022
  episode_reward_mean: 7.821500000000022
  episode_reward_min: 0.8000000000000129
  episodes_this_iter: 18
  episodes_total: 3406
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4291210237003509
          entropy_coeff: 0.009999999999999998
          kl: 0.012594820834648576
          policy_loss: -0.011834280476683662
          total_loss: 0.5139242080705506
          vf_explained_var: 0.8984050750732422
          vf_loss: 0.5272974401712418
    num_agent_steps_sampled: 371628
    num_agent_steps_trained: 371628
    num_steps_sampled: 371628
    num_steps_trained: 371628
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,186,4875.32,371628,7.8215,14.3,0.8,110.33


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 373626
  custom_metrics: {}
  date: 2021-11-09_05-06-56
  done: false
  episode_len_mean: 109.99
  episode_media: {}
  episode_reward_max: 14.320000000000022
  episode_reward_mean: 7.872200000000018
  episode_reward_min: 0.8000000000000129
  episodes_this_iter: 18
  episodes_total: 3424
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4985514544305347
          entropy_coeff: 0.009999999999999998
          kl: 0.00878010784502944
          policy_loss: -0.00995117619278885
          total_loss: 0.1907026415069898
          vf_explained_var: 0.9580772519111633
          vf_loss: 0.2067494703545457
    num_agent_steps_sampled: 373626
    num_agent_steps_trained: 373626
    num_steps_sampled: 373626
    num_steps_trained: 373626
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,187,4898.54,373626,7.8722,14.32,0.8,109.99


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 375624
  custom_metrics: {}
  date: 2021-11-09_05-07-20
  done: false
  episode_len_mean: 108.99
  episode_media: {}
  episode_reward_max: 14.320000000000022
  episode_reward_mean: 7.971400000000018
  episode_reward_min: 0.8000000000000129
  episodes_this_iter: 19
  episodes_total: 3443
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4574941873550415
          entropy_coeff: 0.009999999999999998
          kl: 0.006794669701181007
          policy_loss: 0.002602244523309526
          total_loss: 0.14632243911425272
          vf_explained_var: 0.9688791632652283
          vf_loss: 0.15141553250806672
    num_agent_steps_sampled: 375624
    num_agent_steps_trained: 375624
    num_steps_sampled: 375624
    num_steps_trained: 375624
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,188,4923.09,375624,7.9714,14.32,0.8,108.99


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 377622
  custom_metrics: {}
  date: 2021-11-09_05-07-45
  done: false
  episode_len_mean: 108.81
  episode_media: {}
  episode_reward_max: 14.320000000000022
  episode_reward_mean: 7.774000000000018
  episode_reward_min: 0.8000000000000129
  episodes_this_iter: 19
  episodes_total: 3462
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4978817542394003
          entropy_coeff: 0.009999999999999998
          kl: 0.008419868293012996
          policy_loss: -0.06657345988565967
          total_loss: 0.13721758062463432
          vf_explained_var: 0.9521040320396423
          vf_loss: 0.21024474242613428
    num_agent_steps_sampled: 377622
    num_agent_steps_trained: 377622
    num_steps_sampled: 377622
    num_steps_trained: 377622
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,189,4948.01,377622,7.774,14.32,0.8,108.81


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 379620
  custom_metrics: {}
  date: 2021-11-09_05-08-11
  done: false
  episode_len_mean: 107.35
  episode_media: {}
  episode_reward_max: 14.440000000000015
  episode_reward_mean: 7.720400000000019
  episode_reward_min: 2.6400000000000197
  episodes_this_iter: 20
  episodes_total: 3482
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5071627946127029
          entropy_coeff: 0.009999999999999998
          kl: 0.009698442327594746
          policy_loss: -0.04198970865635645
          total_loss: 0.21220657122099684
          vf_explained_var: 0.9494422674179077
          vf_loss: 0.25944823266140055
    num_agent_steps_sampled: 379620
    num_agent_steps_trained: 379620
    num_steps_sampled: 379620
    num_steps_trained: 379620
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,190,4973.66,379620,7.7204,14.44,2.64,107.35


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 381618
  custom_metrics: {}
  date: 2021-11-09_05-08-36
  done: false
  episode_len_mean: 105.07
  episode_media: {}
  episode_reward_max: 14.440000000000015
  episode_reward_mean: 7.315600000000021
  episode_reward_min: 0.44000000000000616
  episodes_this_iter: 20
  episodes_total: 3502
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3981157177970522
          entropy_coeff: 0.009999999999999998
          kl: 0.009246021249142394
          policy_loss: -0.049427501112222674
          total_loss: 0.2586672355509585
          vf_explained_var: 0.9462928175926208
          vf_loss: 0.312714298140435
    num_agent_steps_sampled: 381618
    num_agent_steps_trained: 381618
    num_steps_sampled: 381618
    num_steps_trained: 381618
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,191,4998.51,381618,7.3156,14.44,0.44,105.07


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 383616
  custom_metrics: {}
  date: 2021-11-09_05-09-02
  done: false
  episode_len_mean: 103.96
  episode_media: {}
  episode_reward_max: 14.440000000000015
  episode_reward_mean: 7.174700000000018
  episode_reward_min: 0.44000000000000616
  episodes_this_iter: 19
  episodes_total: 3521
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5181001816477095
          entropy_coeff: 0.009999999999999998
          kl: 0.009817984707436885
          policy_loss: -0.02197928269881578
          total_loss: 0.20478002237422124
          vf_explained_var: 0.9446086883544922
          vf_loss: 0.23199959580032598
    num_agent_steps_sampled: 383616
    num_agent_steps_trained: 383616
    num_steps_sampled: 383616
    num_steps_trained: 383616
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,192,5024.53,383616,7.1747,14.44,0.44,103.96


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 385614
  custom_metrics: {}
  date: 2021-11-09_05-09-28
  done: false
  episode_len_mean: 102.44
  episode_media: {}
  episode_reward_max: 14.440000000000015
  episode_reward_mean: 7.1518000000000175
  episode_reward_min: 0.44000000000000616
  episodes_this_iter: 20
  episodes_total: 3541
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4524286457470486
          entropy_coeff: 0.009999999999999998
          kl: 0.01159645894296505
          policy_loss: -0.01326497235291061
          total_loss: 0.3697890547680713
          vf_explained_var: 0.9362902641296387
          vf_loss: 0.3858368979323478
    num_agent_steps_sampled: 385614
    num_agent_steps_trained: 385614
    num_steps_sampled: 385614
    num_steps_trained: 385614
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,193,5050.33,385614,7.1518,14.44,0.44,102.44




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 387612
  custom_metrics: {}
  date: 2021-11-09_05-10-08
  done: false
  episode_len_mean: 100.68
  episode_media: {}
  episode_reward_max: 14.440000000000015
  episode_reward_mean: 6.937800000000018
  episode_reward_min: -0.04
  episodes_this_iter: 21
  episodes_total: 3562
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4396630661828178
          entropy_coeff: 0.009999999999999998
          kl: 0.011529806138920492
          policy_loss: 0.015038310133275532
          total_loss: 0.344733297097541
          vf_explained_var: 0.9212178587913513
          vf_loss: 0.3324176903459288
    num_agent_steps_sampled: 387612
    num_agent_steps_trained: 387612
    num_steps_sampled: 387612
    num_steps_trained: 387612
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,194,5090.56,387612,6.9378,14.44,-0.04,100.68




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 389610
  custom_metrics: {}
  date: 2021-11-09_05-10-51
  done: false
  episode_len_mean: 99.67
  episode_media: {}
  episode_reward_max: 13.86000000000002
  episode_reward_mean: 6.704700000000018
  episode_reward_min: -0.04
  episodes_this_iter: 20
  episodes_total: 3582
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.5597428719202677
          entropy_coeff: 0.009999999999999998
          kl: 0.010701868429274286
          policy_loss: -0.01674347875994586
          total_loss: 0.4094330354460648
          vf_explained_var: 0.8870644569396973
          vf_loss: 0.4309382993550528
    num_agent_steps_sampled: 389610
    num_agent_steps_trained: 389610
    num_steps_sampled: 389610
    num_steps_trained: 389610
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,195,5133.65,389610,6.7047,13.86,-0.04,99.67


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 391608
  custom_metrics: {}
  date: 2021-11-09_05-11-17
  done: false
  episode_len_mean: 98.65
  episode_media: {}
  episode_reward_max: 14.130000000000022
  episode_reward_mean: 6.572100000000017
  episode_reward_min: -0.04
  episodes_this_iter: 20
  episodes_total: 3602
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4875768638792493
          entropy_coeff: 0.009999999999999998
          kl: 0.00904314124000431
          policy_loss: -0.011762634469639687
          total_loss: 0.3115492763441233
          vf_explained_var: 0.9198101162910461
          vf_loss: 0.32903149802060355
    num_agent_steps_sampled: 391608
    num_agent_steps_trained: 391608
    num_steps_sampled: 391608
    num_steps_trained: 391608
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,196,5159.49,391608,6.5721,14.13,-0.04,98.65


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 393606
  custom_metrics: {}
  date: 2021-11-09_05-11-43
  done: false
  episode_len_mean: 99.12
  episode_media: {}
  episode_reward_max: 14.130000000000022
  episode_reward_mean: 6.631200000000018
  episode_reward_min: -0.04
  episodes_this_iter: 20
  episodes_total: 3622
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4622323689006624
          entropy_coeff: 0.009999999999999998
          kl: 0.008477210689013186
          policy_loss: -0.07716036994187604
          total_loss: 0.17015582559009393
          vf_explained_var: 0.9367955327033997
          vf_loss: 0.2533553443849087
    num_agent_steps_sampled: 393606
    num_agent_steps_trained: 393606
    num_steps_sampled: 393606
    num_steps_trained: 393606
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,197,5185.42,393606,6.6312,14.13,-0.04,99.12


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 395604
  custom_metrics: {}
  date: 2021-11-09_05-12-08
  done: false
  episode_len_mean: 99.41
  episode_media: {}
  episode_reward_max: 14.160000000000018
  episode_reward_mean: 6.643000000000018
  episode_reward_min: -0.04
  episodes_this_iter: 20
  episodes_total: 3642
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.425427964755467
          entropy_coeff: 0.009999999999999998
          kl: 0.012499702162665091
          policy_loss: -0.05903983749449253
          total_loss: 0.30324050133072195
          vf_explained_var: 0.9188843369483948
          vf_loss: 0.36387867026385806
    num_agent_steps_sampled: 395604
    num_agent_steps_trained: 395604
    num_steps_sampled: 395604
    num_steps_trained: 395604
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,198,5210.91,395604,6.643,14.16,-0.04,99.41


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 397602
  custom_metrics: {}
  date: 2021-11-09_05-12-34
  done: false
  episode_len_mean: 99.84
  episode_media: {}
  episode_reward_max: 14.300000000000018
  episode_reward_mean: 6.796000000000018
  episode_reward_min: -0.04
  episodes_this_iter: 19
  episodes_total: 3661
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3239491133462815
          entropy_coeff: 0.009999999999999998
          kl: 0.022372992231369692
          policy_loss: -0.010094381771272138
          total_loss: 0.4969385171131719
          vf_explained_var: 0.9236415028572083
          vf_loss: 0.49761973108564106
    num_agent_steps_sampled: 397602
    num_agent_steps_trained: 397602
    num_steps_sampled: 397602
    num_steps_trained: 397602
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,199,5236.25,397602,6.796,14.3,-0.04,99.84


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 399600
  custom_metrics: {}
  date: 2021-11-09_05-12-59
  done: false
  episode_len_mean: 101.82
  episode_media: {}
  episode_reward_max: 14.300000000000018
  episode_reward_mean: 7.2150000000000185
  episode_reward_min: 0.9000000000000155
  episodes_this_iter: 19
  episodes_total: 3680
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.000000000000001e-05
          entropy: 1.5102348367373148
          entropy_coeff: 0.009999999999999998
          kl: 0.005987592978732724
          policy_loss: 0.004316509124778566
          total_loss: 0.19796461682944072
          vf_explained_var: 0.9547319412231445
          vf_loss: 0.19965679860186009
    num_agent_steps_sampled: 399600
    num_agent_steps_trained: 399600
    num_steps_sampled: 399600
    num_steps_trained: 3996

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,200,5261.05,399600,7.215,14.3,0.9,101.82


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 401598
  custom_metrics: {}
  date: 2021-11-09_05-13-24
  done: false
  episode_len_mean: 102.25
  episode_media: {}
  episode_reward_max: 14.300000000000018
  episode_reward_mean: 7.38390000000002
  episode_reward_min: 0.9000000000000155
  episodes_this_iter: 21
  episodes_total: 3701
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.000000000000001e-05
          entropy: 1.4527187194143023
          entropy_coeff: 0.009999999999999998
          kl: 0.007611540033007009
          policy_loss: -0.019299078626292092
          total_loss: 0.33173711603241307
          vf_explained_var: 0.94244384765625
          vf_loss: 0.3540033568228994
    num_agent_steps_sampled: 401598
    num_agent_steps_trained: 401598
    num_steps_sampled: 401598
    num_steps_trained: 401598
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,201,5286.23,401598,7.3839,14.3,0.9,102.25


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 403596
  custom_metrics: {}
  date: 2021-11-09_05-13-50
  done: false
  episode_len_mean: 101.31
  episode_media: {}
  episode_reward_max: 14.300000000000018
  episode_reward_mean: 7.545600000000018
  episode_reward_min: 0.9000000000000155
  episodes_this_iter: 20
  episodes_total: 3721
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.000000000000001e-05
          entropy: 1.4078906030881972
          entropy_coeff: 0.009999999999999998
          kl: 0.0082239185134488
          policy_loss: -0.02033132401605447
          total_loss: 0.24854694319268067
          vf_explained_var: 0.9358932971954346
          vf_loss: 0.2704670977024805
    num_agent_steps_sampled: 403596
    num_agent_steps_trained: 403596
    num_steps_sampled: 403596
    num_steps_trained: 403596
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,202,5312,403596,7.5456,14.3,0.9,101.31


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 405594
  custom_metrics: {}
  date: 2021-11-09_05-14-15
  done: false
  episode_len_mean: 100.63
  episode_media: {}
  episode_reward_max: 14.300000000000018
  episode_reward_mean: 7.471700000000018
  episode_reward_min: 0.9000000000000155
  episodes_this_iter: 19
  episodes_total: 3740
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.000000000000001e-05
          entropy: 1.3676210454532078
          entropy_coeff: 0.009999999999999998
          kl: 0.013777990252600172
          policy_loss: 0.007458950135679472
          total_loss: 0.3488426962601287
          vf_explained_var: 0.9389707446098328
          vf_loss: 0.33413463603882565
    num_agent_steps_sampled: 405594
    num_agent_steps_trained: 405594
    num_steps_sampled: 405594
    num_steps_trained: 405594

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,203,5337.65,405594,7.4717,14.3,0.9,100.63


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 407592
  custom_metrics: {}
  date: 2021-11-09_05-14-40
  done: false
  episode_len_mean: 100.74
  episode_media: {}
  episode_reward_max: 14.250000000000018
  episode_reward_mean: 7.387900000000017
  episode_reward_min: 0.9000000000000155
  episodes_this_iter: 20
  episodes_total: 3760
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.000000000000001e-05
          entropy: 1.3782679901236579
          entropy_coeff: 0.009999999999999998
          kl: 0.013068721813960129
          policy_loss: 0.005265944424484457
          total_loss: 0.3531583031905549
          vf_explained_var: 0.9376431703567505
          vf_loss: 0.34182691708916707
    num_agent_steps_sampled: 407592
    num_agent_steps_trained: 407592
    num_steps_sampled: 407592
    num_steps_trained: 407592

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,204,5362.38,407592,7.3879,14.25,0.9,100.74


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 409590
  custom_metrics: {}
  date: 2021-11-09_05-15-05
  done: false
  episode_len_mean: 100.62
  episode_media: {}
  episode_reward_max: 14.250000000000018
  episode_reward_mean: 7.559800000000017
  episode_reward_min: 0.9899999999999991
  episodes_this_iter: 19
  episodes_total: 3779
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.000000000000001e-05
          entropy: 1.3832927953629266
          entropy_coeff: 0.009999999999999998
          kl: 0.009311567854551344
          policy_loss: -0.03272637747937725
          total_loss: 0.3228616366517686
          vf_explained_var: 0.9489102959632874
          vf_loss: 0.35527899815213115
    num_agent_steps_sampled: 409590
    num_agent_steps_trained: 409590
    num_steps_sampled: 409590
    num_steps_trained: 409590

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,205,5387.48,409590,7.5598,14.25,0.99,100.62


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 411588
  custom_metrics: {}
  date: 2021-11-09_05-15-31
  done: false
  episode_len_mean: 100.62
  episode_media: {}
  episode_reward_max: 14.29000000000002
  episode_reward_mean: 7.737100000000019
  episode_reward_min: 0.9899999999999991
  episodes_this_iter: 20
  episodes_total: 3799
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.000000000000001e-05
          entropy: 1.3698470467612858
          entropy_coeff: 0.009999999999999998
          kl: 0.005793916313106651
          policy_loss: -0.0210768841561817
          total_loss: 0.2192581245675683
          vf_explained_var: 0.9524614810943604
          vf_loss: 0.245233967261655
    num_agent_steps_sampled: 411588
    num_agent_steps_trained: 411588
    num_steps_sampled: 411588
    num_steps_trained: 411588
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,206,5412.87,411588,7.7371,14.29,0.99,100.62


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 413586
  custom_metrics: {}
  date: 2021-11-09_05-15-54
  done: false
  episode_len_mean: 102.09
  episode_media: {}
  episode_reward_max: 14.29000000000002
  episode_reward_mean: 7.67710000000002
  episode_reward_min: 0.9899999999999991
  episodes_this_iter: 18
  episodes_total: 3817
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.000000000000001e-05
          entropy: 1.4270921559560867
          entropy_coeff: 0.009999999999999998
          kl: 0.009653939559028716
          policy_loss: -0.034379507468215054
          total_loss: 0.3482711355601038
          vf_explained_var: 0.932550311088562
          vf_loss: 0.3822596435745557
    num_agent_steps_sampled: 413586
    num_agent_steps_trained: 413586
    num_steps_sampled: 413586
    num_steps_trained: 413586
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,207,5436.56,413586,7.6771,14.29,0.99,102.09


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 415584
  custom_metrics: {}
  date: 2021-11-09_05-16-19
  done: false
  episode_len_mean: 103.89
  episode_media: {}
  episode_reward_max: 14.29000000000002
  episode_reward_mean: 7.983200000000019
  episode_reward_min: 0.9899999999999991
  episodes_this_iter: 19
  episodes_total: 3836
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.000000000000001e-05
          entropy: 1.3765817000752403
          entropy_coeff: 0.009999999999999998
          kl: 0.009518058929156427
          policy_loss: -0.04651528251845212
          total_loss: 0.2736314704730397
          vf_explained_var: 0.939660370349884
          vf_loss: 0.31945701787869135
    num_agent_steps_sampled: 415584
    num_agent_steps_trained: 415584
    num_steps_sampled: 415584
    num_steps_trained: 415584
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,208,5461.5,415584,7.9832,14.29,0.99,103.89


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 417582
  custom_metrics: {}
  date: 2021-11-09_05-16-44
  done: false
  episode_len_mean: 104.38
  episode_media: {}
  episode_reward_max: 14.29000000000002
  episode_reward_mean: 8.000100000000018
  episode_reward_min: 0.9899999999999991
  episodes_this_iter: 20
  episodes_total: 3856
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.000000000000001e-05
          entropy: 1.3685457490739368
          entropy_coeff: 0.009999999999999998
          kl: 0.013238719124394734
          policy_loss: -0.0432831039563531
          total_loss: 0.3477289775652545
          vf_explained_var: 0.932616651058197
          vf_loss: 0.38459123681698526
    num_agent_steps_sampled: 417582
    num_agent_steps_trained: 417582
    num_steps_sampled: 417582
    num_steps_trained: 417582
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,209,5486.59,417582,8.0001,14.29,0.99,104.38


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 419580
  custom_metrics: {}
  date: 2021-11-09_05-17-09
  done: false
  episode_len_mean: 105.33
  episode_media: {}
  episode_reward_max: 14.29000000000002
  episode_reward_mean: 8.25540000000002
  episode_reward_min: 2.360000000000018
  episodes_this_iter: 19
  episodes_total: 3875
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.000000000000001e-05
          entropy: 1.3125086369968595
          entropy_coeff: 0.009999999999999998
          kl: 0.00920022130048425
          policy_loss: -0.024417151714719476
          total_loss: 0.3695090220708932
          vf_explained_var: 0.9234254360198975
          vf_loss: 0.39307842212063926
    num_agent_steps_sampled: 419580
    num_agent_steps_trained: 419580
    num_steps_sampled: 419580
    num_steps_trained: 419580
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,210,5510.87,419580,8.2554,14.29,2.36,105.33


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 421578
  custom_metrics: {}
  date: 2021-11-09_05-17-33
  done: false
  episode_len_mean: 105.67
  episode_media: {}
  episode_reward_max: 14.29000000000002
  episode_reward_mean: 8.238200000000019
  episode_reward_min: 2.360000000000018
  episodes_this_iter: 18
  episodes_total: 3893
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.000000000000001e-05
          entropy: 1.3969667377926054
          entropy_coeff: 0.009999999999999998
          kl: 0.006333611800124395
          policy_loss: 0.007289713940450124
          total_loss: 0.2970615019046125
          vf_explained_var: 0.950057864189148
          vf_loss: 0.2941222839057446
    num_agent_steps_sampled: 421578
    num_agent_steps_trained: 421578
    num_steps_sampled: 421578
    num_steps_trained: 421578
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,211,5535.33,421578,8.2382,14.29,2.36,105.67




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 423576
  custom_metrics: {}
  date: 2021-11-09_05-18-11
  done: false
  episode_len_mean: 106.19
  episode_media: {}
  episode_reward_max: 14.010000000000021
  episode_reward_mean: 8.165900000000018
  episode_reward_min: 2.360000000000018
  episodes_this_iter: 19
  episodes_total: 3912
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.000000000000001e-05
          entropy: 1.279895788147336
          entropy_coeff: 0.009999999999999998
          kl: 0.006571204258004715
          policy_loss: 0.03372973388149625
          total_loss: 0.26866457521516296
          vf_explained_var: 0.9428728222846985
          vf_loss: 0.23775378107315018
    num_agent_steps_sampled: 423576
    num_agent_steps_trained: 423576
    num_steps_sampled: 423576
    num_steps_trained: 423576
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,212,5573.23,423576,8.1659,14.01,2.36,106.19




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 425574
  custom_metrics: {}
  date: 2021-11-09_05-18-51
  done: false
  episode_len_mean: 102.45
  episode_media: {}
  episode_reward_max: 14.010000000000021
  episode_reward_mean: 7.749200000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 3933
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.000000000000001e-05
          entropy: 1.3846381136349268
          entropy_coeff: 0.009999999999999998
          kl: 0.010730145900681755
          policy_loss: -0.023783747319664275
          total_loss: 0.48954094457661823
          vf_explained_var: 0.9076771140098572
          vf_loss: 0.5108746661848965
    num_agent_steps_sampled: 425574
    num_agent_steps_trained: 425574
    num_steps_sampled: 425574
    num_steps_trained: 42

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,213,5613.48,425574,7.7492,14.01,-0.06,102.45


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 427572
  custom_metrics: {}
  date: 2021-11-09_05-19-19
  done: false
  episode_len_mean: 101.95
  episode_media: {}
  episode_reward_max: 14.010000000000021
  episode_reward_mean: 7.886900000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 3953
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.000000000000001e-05
          entropy: 1.3264337051482427
          entropy_coeff: 0.009999999999999998
          kl: 0.008819304717652654
          policy_loss: -0.036309508979320525
          total_loss: 0.27472333056142645
          vf_explained_var: 0.9517669677734375
          vf_loss: 0.310902858986741
    num_agent_steps_sampled: 427572
    num_agent_steps_trained: 427572
    num_steps_sampled: 427572
    num_steps_trained: 427

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,214,5641.24,427572,7.8869,14.01,-0.06,101.95


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 429570
  custom_metrics: {}
  date: 2021-11-09_05-19-44
  done: false
  episode_len_mean: 101.84
  episode_media: {}
  episode_reward_max: 13.87000000000002
  episode_reward_mean: 7.677000000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 3973
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.000000000000001e-05
          entropy: 1.363589328243619
          entropy_coeff: 0.009999999999999998
          kl: 0.006669223330982463
          policy_loss: -0.03294741103336925
          total_loss: 0.21575901185472807
          vf_explained_var: 0.9427846074104309
          vf_loss: 0.252213433633248
    num_agent_steps_sampled: 429570
    num_agent_steps_trained: 429570
    num_steps_sampled: 429570
    num_steps_trained: 429570

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,215,5666.3,429570,7.677,13.87,-0.06,101.84


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 431568
  custom_metrics: {}
  date: 2021-11-09_05-20-10
  done: false
  episode_len_mean: 101.01
  episode_media: {}
  episode_reward_max: 13.87000000000002
  episode_reward_mean: 7.617200000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 3992
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.000000000000001e-05
          entropy: 1.4217577270099095
          entropy_coeff: 0.009999999999999998
          kl: 0.0053954993923701665
          policy_loss: -0.042992914219697316
          total_loss: 0.2115221241045566
          vf_explained_var: 0.9416049122810364
          vf_loss: 0.26053820294993263
    num_agent_steps_sampled: 431568
    num_agent_steps_trained: 431568
    num_steps_sampled: 431568
    num_steps_trained: 43

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,216,5692.08,431568,7.6172,13.87,-0.06,101.01


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 433566
  custom_metrics: {}
  date: 2021-11-09_05-20-36
  done: false
  episode_len_mean: 99.86
  episode_media: {}
  episode_reward_max: 13.87000000000002
  episode_reward_mean: 7.339900000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 4011
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 5.000000000000001e-05
          entropy: 1.4008613251504445
          entropy_coeff: 0.009999999999999998
          kl: 0.004759025168157276
          policy_loss: 0.007995230926289445
          total_loss: 0.1412312498582261
          vf_explained_var: 0.9699461460113525
          vf_loss: 0.14001686026652654
    num_agent_steps_sampled: 433566
    num_agent_steps_trained: 433566
    num_steps_sampled: 433566
    num_steps_trained: 43356

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,217,5718.08,433566,7.3399,13.87,-0.06,99.86


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 435564
  custom_metrics: {}
  date: 2021-11-09_05-21-02
  done: false
  episode_len_mean: 101.6
  episode_media: {}
  episode_reward_max: 14.060000000000025
  episode_reward_mean: 7.501900000000018
  episode_reward_min: 1.0600000000000116
  episodes_this_iter: 20
  episodes_total: 4031
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.3988518652461823
          entropy_coeff: 0.009999999999999998
          kl: 0.006671857338335012
          policy_loss: -0.06684733685993013
          total_loss: 0.07227654113833393
          vf_explained_var: 0.9704297780990601
          vf_loss: 0.14804595437433038
    num_agent_steps_sampled: 435564
    num_agent_steps_trained: 435564
    num_steps_sampled: 435564
    num_steps_trained: 435564

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,218,5743.56,435564,7.5019,14.06,1.06,101.6


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 437562
  custom_metrics: {}
  date: 2021-11-09_05-21-27
  done: false
  episode_len_mean: 102.92
  episode_media: {}
  episode_reward_max: 14.060000000000025
  episode_reward_mean: 7.679200000000019
  episode_reward_min: 1.0600000000000116
  episodes_this_iter: 20
  episodes_total: 4051
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.4211179710569837
          entropy_coeff: 0.009999999999999998
          kl: 0.011467997502750411
          policy_loss: -0.028505914534131687
          total_loss: 0.34765895830378646
          vf_explained_var: 0.9252415895462036
          vf_loss: 0.38166753961926414
    num_agent_steps_sampled: 437562
    num_agent_steps_trained: 437562
    num_steps_sampled: 437562
    num_steps_trained: 4375

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,219,5768.68,437562,7.6792,14.06,1.06,102.92


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 439560
  custom_metrics: {}
  date: 2021-11-09_05-21-54
  done: false
  episode_len_mean: 101.77
  episode_media: {}
  episode_reward_max: 14.330000000000018
  episode_reward_mean: 7.552200000000019
  episode_reward_min: 2.8600000000000136
  episodes_this_iter: 20
  episodes_total: 4071
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.4013859953199115
          entropy_coeff: 0.009999999999999998
          kl: 0.00898741161028637
          policy_loss: -0.029470389008167244
          total_loss: 0.22108642886437121
          vf_explained_var: 0.9500879049301147
          vf_loss: 0.2577458624683675
    num_agent_steps_sampled: 439560
    num_agent_steps_trained: 439560
    num_steps_sampled: 439560
    num_steps_trained: 439560

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,220,5795.54,439560,7.5522,14.33,2.86,101.77


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 441558
  custom_metrics: {}
  date: 2021-11-09_05-22-20
  done: false
  episode_len_mean: 101.49
  episode_media: {}
  episode_reward_max: 14.330000000000018
  episode_reward_mean: 7.3808000000000185
  episode_reward_min: 2.8600000000000136
  episodes_this_iter: 20
  episodes_total: 4091
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.3310432121867226
          entropy_coeff: 0.009999999999999998
          kl: 0.008510455512899855
          policy_loss: -0.06406320906465962
          total_loss: 0.1616503149891893
          vf_explained_var: 0.9555668234825134
          vf_loss: 0.2325613283330486
    num_agent_steps_sampled: 441558
    num_agent_steps_trained: 441558
    num_steps_sampled: 441558
    num_steps_trained: 441558

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,221,5821.31,441558,7.3808,14.33,2.86,101.49


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 443556
  custom_metrics: {}
  date: 2021-11-09_05-22-46
  done: false
  episode_len_mean: 100.49
  episode_media: {}
  episode_reward_max: 14.330000000000018
  episode_reward_mean: 7.762300000000018
  episode_reward_min: 2.8600000000000136
  episodes_this_iter: 20
  episodes_total: 4111
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.2999965304420107
          entropy_coeff: 0.009999999999999998
          kl: 0.011542003710446896
          policy_loss: -0.03394757021395933
          total_loss: 0.22280513987477338
          vf_explained_var: 0.957452654838562
          vf_loss: 0.26098796867188956
    num_agent_steps_sampled: 443556
    num_agent_steps_trained: 443556
    num_steps_sampled: 443556
    num_steps_trained: 443556

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,222,5848.1,443556,7.7623,14.33,2.86,100.49


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 445554
  custom_metrics: {}
  date: 2021-11-09_05-23-11
  done: false
  episode_len_mean: 101.4
  episode_media: {}
  episode_reward_max: 14.330000000000018
  episode_reward_mean: 7.94830000000002
  episode_reward_min: 2.8600000000000136
  episodes_this_iter: 19
  episodes_total: 4130
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.3809923438798815
          entropy_coeff: 0.009999999999999998
          kl: 0.0104900175239971
          policy_loss: -0.04855030890376795
          total_loss: 0.1591664365359715
          vf_explained_var: 0.9651190638542175
          vf_loss: 0.2135608088402521
    num_agent_steps_sampled: 445554
    num_agent_steps_trained: 445554
    num_steps_sampled: 445554
    num_steps_trained: 445554
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,223,5872.74,445554,7.9483,14.33,2.86,101.4


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 447552
  custom_metrics: {}
  date: 2021-11-09_05-23-36
  done: false
  episode_len_mean: 100.78
  episode_media: {}
  episode_reward_max: 14.330000000000018
  episode_reward_mean: 7.8831000000000175
  episode_reward_min: 2.7200000000000135
  episodes_this_iter: 19
  episodes_total: 4149
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.3554069388480414
          entropy_coeff: 0.009999999999999998
          kl: 0.010892526932659179
          policy_loss: -0.03421865934949546
          total_loss: 0.17430946570155875
          vf_explained_var: 0.9611020088195801
          vf_loss: 0.21381068415939808
    num_agent_steps_sampled: 447552
    num_agent_steps_trained: 447552
    num_steps_sampled: 447552
    num_steps_trained: 4475

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,224,5898.12,447552,7.8831,14.33,2.72,100.78


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 449550
  custom_metrics: {}
  date: 2021-11-09_05-24-01
  done: false
  episode_len_mean: 102.61
  episode_media: {}
  episode_reward_max: 14.15000000000002
  episode_reward_mean: 8.196200000000019
  episode_reward_min: 2.4300000000000144
  episodes_this_iter: 19
  episodes_total: 4168
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.3528809819902692
          entropy_coeff: 0.009999999999999998
          kl: 0.009393985504883243
          policy_loss: -0.04868191122299149
          total_loss: 0.2139478820243052
          vf_explained_var: 0.9576193690299988
          vf_loss: 0.26902504655576887
    num_agent_steps_sampled: 449550
    num_agent_steps_trained: 449550
    num_steps_sampled: 449550
    num_steps_trained: 449550


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,225,5923.19,449550,8.1962,14.15,2.43,102.61


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 451548
  custom_metrics: {}
  date: 2021-11-09_05-24-27
  done: false
  episode_len_mean: 103.4
  episode_media: {}
  episode_reward_max: 14.15000000000002
  episode_reward_mean: 8.352500000000019
  episode_reward_min: 2.4300000000000144
  episodes_this_iter: 19
  episodes_total: 4187
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.4676977316538493
          entropy_coeff: 0.009999999999999998
          kl: 0.008135077515432885
          policy_loss: -0.03464491812600976
          total_loss: 0.11237639269481102
          vf_explained_var: 0.9695637226104736
          vf_loss: 0.1555207148903892
    num_agent_steps_sampled: 451548
    num_agent_steps_trained: 451548
    num_steps_sampled: 451548
    num_steps_trained: 451548
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,226,5949.13,451548,8.3525,14.15,2.43,103.4


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 453546
  custom_metrics: {}
  date: 2021-11-09_05-24-53
  done: false
  episode_len_mean: 104.62
  episode_media: {}
  episode_reward_max: 14.330000000000018
  episode_reward_mean: 8.526500000000022
  episode_reward_min: 2.4300000000000144
  episodes_this_iter: 19
  episodes_total: 4206
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.351211146513621
          entropy_coeff: 0.009999999999999998
          kl: 0.00979814997300227
          policy_loss: -0.06926338140453611
          total_loss: 0.10444963284369026
          vf_explained_var: 0.977556049823761
          vf_loss: 0.17978465450661524
    num_agent_steps_sampled: 453546
    num_agent_steps_trained: 453546
    num_steps_sampled: 453546
    num_steps_trained: 453546
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,227,5974.13,453546,8.5265,14.33,2.43,104.62


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 455544
  custom_metrics: {}
  date: 2021-11-09_05-25-19
  done: false
  episode_len_mean: 103.87
  episode_media: {}
  episode_reward_max: 14.330000000000018
  episode_reward_mean: 8.457800000000018
  episode_reward_min: 2.4300000000000144
  episodes_this_iter: 20
  episodes_total: 4226
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.3006198201860701
          entropy_coeff: 0.009999999999999998
          kl: 0.01665013260297835
          policy_loss: -0.015487157216384297
          total_loss: 0.33184134440407864
          vf_explained_var: 0.9566037058830261
          vf_loss: 0.34769100417338666
    num_agent_steps_sampled: 455544
    num_agent_steps_trained: 455544
    num_steps_sampled: 455544
    num_steps_trained: 45554

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,228,6000.97,455544,8.4578,14.33,2.43,103.87


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 457542
  custom_metrics: {}
  date: 2021-11-09_05-25-45
  done: false
  episode_len_mean: 104.14
  episode_media: {}
  episode_reward_max: 14.330000000000018
  episode_reward_mean: 8.63120000000002
  episode_reward_min: 2.4300000000000144
  episodes_this_iter: 19
  episodes_total: 4245
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.3141901578222002
          entropy_coeff: 0.009999999999999998
          kl: 0.009688630643943688
          policy_loss: -0.05011291099446161
          total_loss: 0.20088039039678518
          vf_explained_var: 0.9535611271858215
          vf_loss: 0.25677789907370296
    num_agent_steps_sampled: 457542
    num_agent_steps_trained: 457542
    num_steps_sampled: 457542
    num_steps_trained: 457542

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,229,6026.14,457542,8.6312,14.33,2.43,104.14




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 459540
  custom_metrics: {}
  date: 2021-11-09_05-26-26
  done: false
  episode_len_mean: 103.88
  episode_media: {}
  episode_reward_max: 14.330000000000018
  episode_reward_mean: 8.563000000000018
  episode_reward_min: 2.440000000000018
  episodes_this_iter: 19
  episodes_total: 4264
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.3263503687722342
          entropy_coeff: 0.009999999999999998
          kl: 0.009352126929851518
          policy_loss: -0.05917033907913026
          total_loss: 0.17250109161472038
          vf_explained_var: 0.9540281891822815
          vf_loss: 0.2378331607651143
    num_agent_steps_sampled: 459540
    num_agent_steps_trained: 459540
    num_steps_sampled: 459540
    num_steps_trained: 459540


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,230,6067.69,459540,8.563,14.33,2.44,103.88




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 461538
  custom_metrics: {}
  date: 2021-11-09_05-27-09
  done: false
  episode_len_mean: 100.63
  episode_media: {}
  episode_reward_max: 14.330000000000018
  episode_reward_mean: 8.229700000000019
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 22
  episodes_total: 4286
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.3592003436315627
          entropy_coeff: 0.009999999999999998
          kl: 0.012356585775938
          policy_loss: -0.049167872805680544
          total_loss: 0.40760595943956146
          vf_explained_var: 0.919855535030365
          vf_loss: 0.46098254975818453
    num_agent_steps_sampled: 461538
    num_agent_steps_trained: 461538
    num_steps_sampled: 461538
    num_steps_trained: 46153

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,231,6110.7,461538,8.2297,14.33,-0.06,100.63


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 463536
  custom_metrics: {}
  date: 2021-11-09_05-27-38
  done: false
  episode_len_mean: 99.43
  episode_media: {}
  episode_reward_max: 14.410000000000021
  episode_reward_mean: 8.125700000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 4307
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.2686856627464294
          entropy_coeff: 0.009999999999999998
          kl: 0.015850250019343794
          policy_loss: -0.05636326008964153
          total_loss: 0.30983055036160206
          vf_explained_var: 0.9516558051109314
          vf_loss: 0.36684438660740853
    num_agent_steps_sampled: 463536
    num_agent_steps_trained: 463536
    num_steps_sampled: 463536
    num_steps_trained: 463

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,232,6139.07,463536,8.1257,14.41,-0.06,99.43


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 465534
  custom_metrics: {}
  date: 2021-11-09_05-28-04
  done: false
  episode_len_mean: 99.73
  episode_media: {}
  episode_reward_max: 14.410000000000021
  episode_reward_mean: 8.02190000000002
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 4328
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.3933838418551854
          entropy_coeff: 0.009999999999999998
          kl: 0.009699553575118163
          policy_loss: -0.04722070926356883
          total_loss: 0.13555949183979205
          vf_explained_var: 0.9670247435569763
          vf_loss: 0.18934843884337516
    num_agent_steps_sampled: 465534
    num_agent_steps_trained: 465534
    num_steps_sampled: 465534
    num_steps_trained: 4655

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,233,6165.28,465534,8.0219,14.41,-0.06,99.73


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 467532
  custom_metrics: {}
  date: 2021-11-09_05-28-30
  done: false
  episode_len_mean: 97.64
  episode_media: {}
  episode_reward_max: 14.550000000000015
  episode_reward_mean: 8.042900000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 4347
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.2354919439270382
          entropy_coeff: 0.009999999999999998
          kl: 0.00958693695588597
          policy_loss: -0.07151896481712659
          total_loss: 0.21608107717086872
          vf_explained_var: 0.9574047327041626
          vf_loss: 0.2926748777429263
    num_agent_steps_sampled: 467532
    num_agent_steps_trained: 467532
    num_steps_sampled: 467532
    num_steps_trained: 46753

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,234,6191.58,467532,8.0429,14.55,-0.06,97.64


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 469530
  custom_metrics: {}
  date: 2021-11-09_05-28-57
  done: false
  episode_len_mean: 96.77
  episode_media: {}
  episode_reward_max: 14.550000000000015
  episode_reward_mean: 8.303500000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 4368
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.2760256057693844
          entropy_coeff: 0.009999999999999998
          kl: 0.011520639400016451
          policy_loss: -0.016176851146987508
          total_loss: 0.3967879970070152
          vf_explained_var: 0.937372088432312
          vf_loss: 0.41697662061169033
    num_agent_steps_sampled: 469530
    num_agent_steps_trained: 469530
    num_steps_sampled: 469530
    num_steps_trained: 4695

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,235,6218.84,469530,8.3035,14.55,-0.06,96.77


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 471528
  custom_metrics: {}
  date: 2021-11-09_05-29-24
  done: false
  episode_len_mean: 99.88
  episode_media: {}
  episode_reward_max: 14.550000000000015
  episode_reward_mean: 8.516900000000017
  episode_reward_min: 2.860000000000012
  episodes_this_iter: 19
  episodes_total: 4387
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.2308317896865664
          entropy_coeff: 0.009999999999999998
          kl: 0.009751157006611938
          policy_loss: 0.025088711827993392
          total_loss: 0.30013408321177676
          vf_explained_var: 0.9545026421546936
          vf_loss: 0.2799489070971807
    num_agent_steps_sampled: 471528
    num_agent_steps_trained: 471528
    num_steps_sampled: 471528
    num_steps_trained: 471528
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,236,6245.19,471528,8.5169,14.55,2.86,99.88


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 473526
  custom_metrics: {}
  date: 2021-11-09_05-29-50
  done: false
  episode_len_mean: 100.49
  episode_media: {}
  episode_reward_max: 14.550000000000015
  episode_reward_mean: 8.486100000000018
  episode_reward_min: 2.6200000000000148
  episodes_this_iter: 19
  episodes_total: 4406
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.3644591620990207
          entropy_coeff: 0.009999999999999998
          kl: 0.011453841727778948
          policy_loss: 0.0027792493103160746
          total_loss: 0.36577665216865995
          vf_explained_var: 0.9412453174591064
          vf_loss: 0.36794423350975625
    num_agent_steps_sampled: 473526
    num_agent_steps_trained: 473526
    num_steps_sampled: 473526
    num_steps_trained: 4735

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,237,6271.65,473526,8.4861,14.55,2.62,100.49


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 475524
  custom_metrics: {}
  date: 2021-11-09_05-30-16
  done: false
  episode_len_mean: 101.2
  episode_media: {}
  episode_reward_max: 14.550000000000015
  episode_reward_mean: 8.373700000000019
  episode_reward_min: 2.6200000000000148
  episodes_this_iter: 19
  episodes_total: 4425
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.3319811520122347
          entropy_coeff: 0.009999999999999998
          kl: 0.008053056215299718
          policy_loss: 0.009628250565202463
          total_loss: 0.21215450898522423
          vf_explained_var: 0.9438986778259277
          vf_loss: 0.20973078127773034
    num_agent_steps_sampled: 475524
    num_agent_steps_trained: 475524
    num_steps_sampled: 475524
    num_steps_trained: 475524

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,238,6297.5,475524,8.3737,14.55,2.62,101.2


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 477522
  custom_metrics: {}
  date: 2021-11-09_05-30-45
  done: false
  episode_len_mean: 100.21
  episode_media: {}
  episode_reward_max: 14.330000000000021
  episode_reward_mean: 7.998700000000018
  episode_reward_min: 2.6200000000000148
  episodes_this_iter: 22
  episodes_total: 4447
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.3772950984182812
          entropy_coeff: 0.009999999999999998
          kl: 0.00991306893869359
          policy_loss: -0.048162706107610746
          total_loss: 0.21196600819627445
          vf_explained_var: 0.9439867734909058
          vf_loss: 0.26637392668496995
    num_agent_steps_sampled: 477522
    num_agent_steps_trained: 477522
    num_steps_sampled: 477522
    num_steps_trained: 47752

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,239,6325.79,477522,7.9987,14.33,2.62,100.21


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 479520
  custom_metrics: {}
  date: 2021-11-09_05-31-11
  done: false
  episode_len_mean: 99.99
  episode_media: {}
  episode_reward_max: 14.420000000000016
  episode_reward_mean: 7.960100000000019
  episode_reward_min: 2.6200000000000148
  episodes_this_iter: 20
  episodes_total: 4467
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.2658016125361125
          entropy_coeff: 0.009999999999999998
          kl: 0.013379538338512494
          policy_loss: -0.010859778195264794
          total_loss: 0.31417925847428185
          vf_explained_var: 0.9488290548324585
          vf_loss: 0.3275369665097623
    num_agent_steps_sampled: 479520
    num_agent_steps_trained: 479520
    num_steps_sampled: 479520
    num_steps_trained: 479520

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,240,6352.56,479520,7.9601,14.42,2.62,99.99


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 481518
  custom_metrics: {}
  date: 2021-11-09_05-31-38
  done: false
  episode_len_mean: 99.39
  episode_media: {}
  episode_reward_max: 14.420000000000016
  episode_reward_mean: 7.691000000000018
  episode_reward_min: 2.6200000000000148
  episodes_this_iter: 21
  episodes_total: 4488
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.37551178250994
          entropy_coeff: 0.009999999999999998
          kl: 0.00965331928396451
          policy_loss: 0.009095325285480136
          total_loss: 0.19875272406886021
          vf_explained_var: 0.9524602293968201
          vf_loss: 0.19608202887078127
    num_agent_steps_sampled: 481518
    num_agent_steps_trained: 481518
    num_steps_sampled: 481518
    num_steps_trained: 481518
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,241,6379.18,481518,7.691,14.42,2.62,99.39


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 483516
  custom_metrics: {}
  date: 2021-11-09_05-32-05
  done: false
  episode_len_mean: 97.32
  episode_media: {}
  episode_reward_max: 14.460000000000017
  episode_reward_mean: 7.548400000000018
  episode_reward_min: 2.740000000000012
  episodes_this_iter: 21
  episodes_total: 4509
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.312893313453311
          entropy_coeff: 0.009999999999999998
          kl: 0.007684405106716694
          policy_loss: 0.009810957587545827
          total_loss: 0.30030811338552404
          vf_explained_var: 0.9391478300094604
          vf_loss: 0.29779074631986163
    num_agent_steps_sampled: 483516
    num_agent_steps_trained: 483516
    num_steps_sampled: 483516
    num_steps_trained: 483516
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,242,6406.62,483516,7.5484,14.46,2.74,97.32


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 485514
  custom_metrics: {}
  date: 2021-11-09_05-32-31
  done: false
  episode_len_mean: 97.96
  episode_media: {}
  episode_reward_max: 14.460000000000017
  episode_reward_mean: 7.735400000000016
  episode_reward_min: 2.740000000000012
  episodes_this_iter: 18
  episodes_total: 4527
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.2564848911194575
          entropy_coeff: 0.009999999999999998
          kl: 0.009025790051514977
          policy_loss: 0.001582207495257968
          total_loss: 0.2862723180403312
          vf_explained_var: 0.9429119229316711
          vf_loss: 0.2904009947819369
    num_agent_steps_sampled: 485514
    num_agent_steps_trained: 485514
    num_steps_sampled: 485514
    num_steps_trained: 485514
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,243,6432.03,485514,7.7354,14.46,2.74,97.96


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 487512
  custom_metrics: {}
  date: 2021-11-09_05-32-57
  done: false
  episode_len_mean: 99.2
  episode_media: {}
  episode_reward_max: 14.460000000000017
  episode_reward_mean: 7.7810000000000175
  episode_reward_min: 2.740000000000012
  episodes_this_iter: 20
  episodes_total: 4547
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.3137178290457951
          entropy_coeff: 0.009999999999999998
          kl: 0.008841396312447418
          policy_loss: -0.04527643659994716
          total_loss: 0.1583502245490395
          vf_explained_var: 0.9626331329345703
          vf_loss: 0.21004990247033892
    num_agent_steps_sampled: 487512
    num_agent_steps_trained: 487512
    num_steps_sampled: 487512
    num_steps_trained: 487512
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,244,6458.6,487512,7.781,14.46,2.74,99.2


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 489510
  custom_metrics: {}
  date: 2021-11-09_05-33-25
  done: false
  episode_len_mean: 98.28
  episode_media: {}
  episode_reward_max: 14.460000000000017
  episode_reward_mean: 7.393600000000018
  episode_reward_min: 0.8200000000000152
  episodes_this_iter: 22
  episodes_total: 4569
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.3937116656984603
          entropy_coeff: 0.009999999999999998
          kl: 0.009347614638301773
          policy_loss: -0.055021413805938904
          total_loss: 0.1711573252010913
          vf_explained_var: 0.9621946811676025
          vf_loss: 0.23301751115137623
    num_agent_steps_sampled: 489510
    num_agent_steps_trained: 489510
    num_steps_sampled: 489510
    num_steps_trained: 489510

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,245,6486.5,489510,7.3936,14.46,0.82,98.28


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 491508
  custom_metrics: {}
  date: 2021-11-09_05-33-52
  done: false
  episode_len_mean: 97.85
  episode_media: {}
  episode_reward_max: 14.460000000000017
  episode_reward_mean: 7.398300000000018
  episode_reward_min: 0.8200000000000152
  episodes_this_iter: 21
  episodes_total: 4590
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.2729229927062988
          entropy_coeff: 0.009999999999999998
          kl: 0.010360523726161601
          policy_loss: -0.004364546920572009
          total_loss: 0.17923080499860503
          vf_explained_var: 0.9619610905647278
          vf_loss: 0.1884570631952513
    num_agent_steps_sampled: 491508
    num_agent_steps_trained: 491508
    num_steps_sampled: 491508
    num_steps_trained: 491508

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,246,6513.09,491508,7.3983,14.46,0.82,97.85


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 493506
  custom_metrics: {}
  date: 2021-11-09_05-34-19
  done: false
  episode_len_mean: 97.5
  episode_media: {}
  episode_reward_max: 14.350000000000017
  episode_reward_mean: 7.425900000000018
  episode_reward_min: -1.0700000000000007
  episodes_this_iter: 21
  episodes_total: 4611
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.2776444889250256
          entropy_coeff: 0.009999999999999998
          kl: 0.020915391505506174
          policy_loss: -0.0066758146243436
          total_loss: 0.34747019834550363
          vf_explained_var: 0.9442264437675476
          vf_loss: 0.35103983144674983
    num_agent_steps_sampled: 493506
    num_agent_steps_trained: 493506
    num_steps_sampled: 493506
    num_steps_trained: 493506


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,247,6540.05,493506,7.4259,14.35,-1.07,97.5




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 495504
  custom_metrics: {}
  date: 2021-11-09_05-35-19
  done: false
  episode_len_mean: 96.48
  episode_media: {}
  episode_reward_max: 14.350000000000017
  episode_reward_mean: 7.584200000000018
  episode_reward_min: -1.0700000000000007
  episodes_this_iter: 20
  episodes_total: 4631
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.33108183826719
          entropy_coeff: 0.009999999999999998
          kl: 0.009604145509124404
          policy_loss: -0.01356557971310048
          total_loss: 0.40398013322126297
          vf_explained_var: 0.9354663491249084
          vf_loss: 0.4199168104146208
    num_agent_steps_sampled: 495504
    num_agent_steps_trained: 495504
    num_steps_sampled: 495504
    num_steps_trained: 495504
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,248,6600.12,495504,7.5842,14.35,-1.07,96.48




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 497502
  custom_metrics: {}
  date: 2021-11-09_05-36-00
  done: false
  episode_len_mean: 94.72
  episode_media: {}
  episode_reward_max: 14.350000000000017
  episode_reward_mean: 7.723400000000015
  episode_reward_min: -1.0700000000000007
  episodes_this_iter: 22
  episodes_total: 4653
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2140114602588472
          entropy_coeff: 0.009999999999999998
          kl: 0.008542830660395724
          policy_loss: -0.0033088675744476774
          total_loss: 0.3579194348128069
          vf_explained_var: 0.9497753381729126
          vf_loss: 0.3636375988168376
    num_agent_steps_sampled: 497502
    num_agent_steps_trained: 497502
    num_steps_sampled: 497502
    num_steps_trained: 49750

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,249,6640.92,497502,7.7234,14.35,-1.07,94.72


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 499500
  custom_metrics: {}
  date: 2021-11-09_05-36-27
  done: false
  episode_len_mean: 95.19
  episode_media: {}
  episode_reward_max: 14.350000000000017
  episode_reward_mean: 7.8888000000000185
  episode_reward_min: -1.0700000000000007
  episodes_this_iter: 20
  episodes_total: 4673
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.311941799663362
          entropy_coeff: 0.009999999999999998
          kl: 0.00860439114827798
          policy_loss: -0.041674323344514486
          total_loss: 0.2176923674353886
          vf_explained_var: 0.9414052367210388
          vf_loss: 0.26268517490298976
    num_agent_steps_sampled: 499500
    num_agent_steps_trained: 499500
    num_steps_sampled: 499500
    num_steps_trained: 499500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,250,6667.86,499500,7.8888,14.35,-1.07,95.19


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 501498
  custom_metrics: {}
  date: 2021-11-09_05-36-54
  done: false
  episode_len_mean: 94.78
  episode_media: {}
  episode_reward_max: 14.470000000000017
  episode_reward_mean: 7.931400000000018
  episode_reward_min: -1.0700000000000007
  episodes_this_iter: 22
  episodes_total: 4695
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2077162348088764
          entropy_coeff: 0.009999999999999998
          kl: 0.005623817787767563
          policy_loss: -0.00265394299335423
          total_loss: 0.10341258696502163
          vf_explained_var: 0.980728268623352
          vf_loss: 0.11173781035911469
    num_agent_steps_sampled: 501498
    num_agent_steps_trained: 501498
    num_steps_sampled: 501498
    num_steps_trained: 501498

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,251,6695.29,501498,7.9314,14.47,-1.07,94.78


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 503496
  custom_metrics: {}
  date: 2021-11-09_05-37-22
  done: false
  episode_len_mean: 94.03
  episode_media: {}
  episode_reward_max: 14.470000000000017
  episode_reward_mean: 7.763700000000016
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 22
  episodes_total: 4717
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.251056679089864
          entropy_coeff: 0.009999999999999998
          kl: 0.007320908418704739
          policy_loss: -0.008171699018705458
          total_loss: 0.16244939815785203
          vf_explained_var: 0.969085693359375
          vf_loss: 0.17479269160401253
    num_agent_steps_sampled: 503496
    num_agent_steps_trained: 503496
    num_steps_sampled: 503496
    num_steps_trained: 5034

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,252,6722.5,503496,7.7637,14.47,-0.06,94.03


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 505494
  custom_metrics: {}
  date: 2021-11-09_05-37-47
  done: false
  episode_len_mean: 94.66
  episode_media: {}
  episode_reward_max: 14.470000000000017
  episode_reward_mean: 7.985400000000016
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 4737
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2165753336179823
          entropy_coeff: 0.009999999999999998
          kl: 0.007134970254718691
          policy_loss: -0.032475466538398036
          total_loss: 0.13743812241369768
          vf_explained_var: 0.9659351706504822
          vf_loss: 0.1739521651750519
    num_agent_steps_sampled: 505494
    num_agent_steps_trained: 505494
    num_steps_sampled: 505494
    num_steps_trained: 505

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,253,6747.87,505494,7.9854,14.47,-0.06,94.66


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 507492
  custom_metrics: {}
  date: 2021-11-09_05-38-14
  done: false
  episode_len_mean: 95.04
  episode_media: {}
  episode_reward_max: 14.470000000000017
  episode_reward_mean: 8.019000000000018
  episode_reward_min: 3.0500000000000096
  episodes_this_iter: 22
  episodes_total: 4759
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2346183328401474
          entropy_coeff: 0.009999999999999998
          kl: 0.00752468204720029
          policy_loss: -0.03313832682158266
          total_loss: 0.1409393805389603
          vf_explained_var: 0.9743592143058777
          vf_loss: 0.17785280675050757
    num_agent_steps_sampled: 507492
    num_agent_steps_trained: 507492
    num_steps_sampled: 507492
    num_steps_trained: 507492
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,254,6774.86,507492,8.019,14.47,3.05,95.04


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 509490
  custom_metrics: {}
  date: 2021-11-09_05-38-41
  done: false
  episode_len_mean: 95.65
  episode_media: {}
  episode_reward_max: 14.470000000000017
  episode_reward_mean: 8.355600000000017
  episode_reward_min: 2.960000000000012
  episodes_this_iter: 20
  episodes_total: 4779
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.201776615210942
          entropy_coeff: 0.009999999999999998
          kl: 0.006221061078942059
          policy_loss: -0.08169364711003645
          total_loss: 0.08963065230775447
          vf_explained_var: 0.9699196815490723
          vf_loss: 0.1762558882435163
    num_agent_steps_sampled: 509490
    num_agent_steps_trained: 509490
    num_steps_sampled: 509490
    num_steps_trained: 509490
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,255,6801.5,509490,8.3556,14.47,2.96,95.65


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 511488
  custom_metrics: {}
  date: 2021-11-09_05-39-08
  done: false
  episode_len_mean: 94.46
  episode_media: {}
  episode_reward_max: 14.500000000000016
  episode_reward_mean: 8.233800000000016
  episode_reward_min: 2.960000000000012
  episodes_this_iter: 22
  episodes_total: 4801
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2335888431185769
          entropy_coeff: 0.009999999999999998
          kl: 0.006932484711453042
          policy_loss: -0.01858941933938435
          total_loss: 0.1478499635167065
          vf_explained_var: 0.9678481221199036
          vf_loss: 0.17087873734888576
    num_agent_steps_sampled: 511488
    num_agent_steps_trained: 511488
    num_steps_sampled: 511488
    num_steps_trained: 511488
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,256,6829.19,511488,8.2338,14.5,2.96,94.46


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 513486
  custom_metrics: {}
  date: 2021-11-09_05-39-37
  done: false
  episode_len_mean: 93.01
  episode_media: {}
  episode_reward_max: 14.500000000000016
  episode_reward_mean: 8.080200000000017
  episode_reward_min: 2.8500000000000143
  episodes_this_iter: 23
  episodes_total: 4824
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2373416378384545
          entropy_coeff: 0.009999999999999998
          kl: 0.019195758289658988
          policy_loss: -0.014337755650991485
          total_loss: 0.28287019289231724
          vf_explained_var: 0.9582188129425049
          vf_loss: 0.2877161942511087
    num_agent_steps_sampled: 513486
    num_agent_steps_trained: 513486
    num_steps_sampled: 513486
    num_steps_trained: 513486

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,257,6857.58,513486,8.0802,14.5,2.85,93.01


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 515484
  custom_metrics: {}
  date: 2021-11-09_05-40-03
  done: false
  episode_len_mean: 93.56
  episode_media: {}
  episode_reward_max: 14.500000000000016
  episode_reward_mean: 8.231100000000017
  episode_reward_min: 2.8500000000000143
  episodes_this_iter: 19
  episodes_total: 4843
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2586334120659601
          entropy_coeff: 0.009999999999999998
          kl: 0.007954361681670854
          policy_loss: -0.005713183211073989
          total_loss: 0.3259807447548069
          vf_explained_var: 0.9583756327629089
          vf_loss: 0.3352197439897628
    num_agent_steps_sampled: 515484
    num_agent_steps_trained: 515484
    num_steps_sampled: 515484
    num_steps_trained: 515484


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,258,6883.22,515484,8.2311,14.5,2.85,93.56


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 517482
  custom_metrics: {}
  date: 2021-11-09_05-40-29
  done: false
  episode_len_mean: 94.47
  episode_media: {}
  episode_reward_max: 14.560000000000013
  episode_reward_mean: 8.605000000000016
  episode_reward_min: 2.8500000000000143
  episodes_this_iter: 21
  episodes_total: 4864
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.373492029167357
          entropy_coeff: 0.009999999999999998
          kl: 0.008365407876478981
          policy_loss: -0.04934698183621679
          total_loss: 0.24034727627322788
          vf_explained_var: 0.9655841588973999
          vf_loss: 0.2939004565988268
    num_agent_steps_sampled: 517482
    num_agent_steps_trained: 517482
    num_steps_sampled: 517482
    num_steps_trained: 517482
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,259,6909.5,517482,8.605,14.56,2.85,94.47


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 519480
  custom_metrics: {}
  date: 2021-11-09_05-40-54
  done: false
  episode_len_mean: 96.05
  episode_media: {}
  episode_reward_max: 14.560000000000013
  episode_reward_mean: 8.509900000000018
  episode_reward_min: 2.8500000000000143
  episodes_this_iter: 19
  episodes_total: 4883
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.299329476129441
          entropy_coeff: 0.009999999999999998
          kl: 0.006351050073293135
          policy_loss: -0.03644714254353728
          total_loss: 0.13621217376064687
          vf_explained_var: 0.9675778150558472
          vf_loss: 0.17841836822529633
    num_agent_steps_sampled: 519480
    num_agent_steps_trained: 519480
    num_steps_sampled: 519480
    num_steps_trained: 519480


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,260,6934.21,519480,8.5099,14.56,2.85,96.05


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 521478
  custom_metrics: {}
  date: 2021-11-09_05-41-20
  done: false
  episode_len_mean: 97.59
  episode_media: {}
  episode_reward_max: 14.560000000000013
  episode_reward_mean: 8.605200000000018
  episode_reward_min: 2.7700000000000164
  episodes_this_iter: 19
  episodes_total: 4902
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3557736362729753
          entropy_coeff: 0.009999999999999998
          kl: 0.007730716328668638
          policy_loss: -0.027694498002529145
          total_loss: 0.18549496272490137
          vf_explained_var: 0.9542096853256226
          vf_loss: 0.21794142715987705
    num_agent_steps_sampled: 521478
    num_agent_steps_trained: 521478
    num_steps_sampled: 521478
    num_steps_trained: 52147

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,261,6960.3,521478,8.6052,14.56,2.77,97.59


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 523476
  custom_metrics: {}
  date: 2021-11-09_05-41-45
  done: false
  episode_len_mean: 100.75
  episode_media: {}
  episode_reward_max: 14.560000000000013
  episode_reward_mean: 8.576700000000018
  episode_reward_min: 2.7700000000000164
  episodes_this_iter: 20
  episodes_total: 4922
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.324438708736783
          entropy_coeff: 0.009999999999999998
          kl: 0.008908318070929917
          policy_loss: -0.01383466592856816
          total_loss: 0.16599574435413594
          vf_explained_var: 0.9656252264976501
          vf_loss: 0.18292766273731276
    num_agent_steps_sampled: 523476
    num_agent_steps_trained: 523476
    num_steps_sampled: 523476
    num_steps_trained: 523476

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,262,6985.72,523476,8.5767,14.56,2.77,100.75


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 525474
  custom_metrics: {}
  date: 2021-11-09_05-42-09
  done: false
  episode_len_mean: 103.64
  episode_media: {}
  episode_reward_max: 14.560000000000013
  episode_reward_mean: 8.603100000000017
  episode_reward_min: 2.7700000000000164
  episodes_this_iter: 18
  episodes_total: 4940
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.321945485614595
          entropy_coeff: 0.009999999999999998
          kl: 0.008017259450419575
          policy_loss: -0.005612489439192272
          total_loss: 0.22193031405054386
          vf_explained_var: 0.9655771851539612
          vf_loss: 0.2316300978263219
    num_agent_steps_sampled: 525474
    num_agent_steps_trained: 525474
    num_steps_sampled: 525474
    num_steps_trained: 525474

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,263,7009.07,525474,8.6031,14.56,2.77,103.64


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 527472
  custom_metrics: {}
  date: 2021-11-09_05-42-32
  done: false
  episode_len_mean: 104.0
  episode_media: {}
  episode_reward_max: 14.560000000000013
  episode_reward_mean: 8.329900000000016
  episode_reward_min: 2.7700000000000164
  episodes_this_iter: 18
  episodes_total: 4958
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3492082556088765
          entropy_coeff: 0.009999999999999998
          kl: 0.010520564679460283
          policy_loss: -0.05564891525677272
          total_loss: 0.14799309248725573
          vf_explained_var: 0.9732953906059265
          vf_loss: 0.20515050900479156
    num_agent_steps_sampled: 527472
    num_agent_steps_trained: 527472
    num_steps_sampled: 527472
    num_steps_trained: 527472

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,264,7032.95,527472,8.3299,14.56,2.77,104




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 529470
  custom_metrics: {}
  date: 2021-11-09_05-43-15
  done: false
  episode_len_mean: 103.96
  episode_media: {}
  episode_reward_max: 14.310000000000022
  episode_reward_mean: 8.216000000000019
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 4979
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3025842519033524
          entropy_coeff: 0.009999999999999998
          kl: 0.00853161910509345
          policy_loss: -0.05781550221145153
          total_loss: 0.1147523604156006
          vf_explained_var: 0.9687603116035461
          vf_loss: 0.17587565659057527
    num_agent_steps_sampled: 529470
    num_agent_steps_trained: 529470
    num_steps_sampled: 529470
    num_steps_trained: 5294

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,265,7075.06,529470,8.216,14.31,-0.06,103.96




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 531468
  custom_metrics: {}
  date: 2021-11-09_05-43-55
  done: false
  episode_len_mean: 104.49
  episode_media: {}
  episode_reward_max: 14.310000000000022
  episode_reward_mean: 8.318200000000019
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 4998
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3395912686983744
          entropy_coeff: 0.009999999999999998
          kl: 0.009621254110589176
          policy_loss: 0.020819386742299512
          total_loss: 0.28603471790750823
          vf_explained_var: 0.9493226408958435
          vf_loss: 0.26765203186798664
    num_agent_steps_sampled: 531468
    num_agent_steps_trained: 531468
    num_steps_sampled: 531468
    num_steps_trained: 53

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,266,7115.81,531468,8.3182,14.31,-0.06,104.49




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 533466
  custom_metrics: {}
  date: 2021-11-09_05-44-35
  done: false
  episode_len_mean: 102.98
  episode_media: {}
  episode_reward_max: 14.310000000000018
  episode_reward_mean: 8.461300000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 5019
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2704605409077236
          entropy_coeff: 0.009999999999999998
          kl: 0.010875084105203594
          policy_loss: -0.017934736738070136
          total_loss: 0.38608248851128985
          vf_explained_var: 0.9519562721252441
          vf_loss: 0.404334428489563
    num_agent_steps_sampled: 533466
    num_agent_steps_trained: 533466
    num_steps_sampled: 533466
    num_steps_trained: 533

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,267,7155.32,533466,8.4613,14.31,-0.06,102.98


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 535464
  custom_metrics: {}
  date: 2021-11-09_05-45-01
  done: false
  episode_len_mean: 101.02
  episode_media: {}
  episode_reward_max: 14.400000000000018
  episode_reward_mean: 8.462300000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 5039
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3651482593445552
          entropy_coeff: 0.009999999999999998
          kl: 0.006430833085327575
          policy_loss: -0.007309464312025479
          total_loss: 0.20353894270956516
          vf_explained_var: 0.9703661203384399
          vf_loss: 0.21717477096688179
    num_agent_steps_sampled: 535464
    num_agent_steps_trained: 535464
    num_steps_sampled: 535464
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,268,7181.53,535464,8.4623,14.4,-0.06,101.02


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 537462
  custom_metrics: {}
  date: 2021-11-09_05-45-27
  done: false
  episode_len_mean: 98.82
  episode_media: {}
  episode_reward_max: 14.400000000000018
  episode_reward_mean: 8.168900000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 5060
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3584645225888208
          entropy_coeff: 0.009999999999999998
          kl: 0.007142646302112576
          policy_loss: -0.03843040363419624
          total_loss: 0.10290088256200154
          vf_explained_var: 0.9797604084014893
          vf_loss: 0.14678001428643864
    num_agent_steps_sampled: 537462
    num_agent_steps_trained: 537462
    num_steps_sampled: 537462
    num_steps_trained: 537

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,269,7206.95,537462,8.1689,14.4,-0.06,98.82


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 539460
  custom_metrics: {}
  date: 2021-11-09_05-45-52
  done: false
  episode_len_mean: 98.77
  episode_media: {}
  episode_reward_max: 14.400000000000018
  episode_reward_mean: 8.186700000000016
  episode_reward_min: 1.95
  episodes_this_iter: 20
  episodes_total: 5080
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3942857146263123
          entropy_coeff: 0.009999999999999998
          kl: 0.005780420998891385
          policy_loss: -0.01349797465261959
          total_loss: 0.10105707145162991
          vf_explained_var: 0.9682961702346802
          vf_loss: 0.12191364176216579
    num_agent_steps_sampled: 539460
    num_agent_steps_trained: 539460
    num_steps_sampled: 539460
    num_steps_trained: 539460
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,270,7232.61,539460,8.1867,14.4,1.95,98.77


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 541458
  custom_metrics: {}
  date: 2021-11-09_05-46-18
  done: false
  episode_len_mean: 98.71
  episode_media: {}
  episode_reward_max: 14.520000000000016
  episode_reward_mean: 8.270400000000016
  episode_reward_min: 1.95
  episodes_this_iter: 20
  episodes_total: 5100
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.347712444691431
          entropy_coeff: 0.009999999999999998
          kl: 0.015028907867282917
          policy_loss: -0.010271177157050088
          total_loss: 0.34183271162300594
          vf_explained_var: 0.9537121057510376
          vf_loss: 0.34846214710601736
    num_agent_steps_sampled: 541458
    num_agent_steps_trained: 541458
    num_steps_sampled: 541458
    num_steps_trained: 541458
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,271,7258.22,541458,8.2704,14.52,1.95,98.71


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 543456
  custom_metrics: {}
  date: 2021-11-09_05-46-44
  done: false
  episode_len_mean: 99.65
  episode_media: {}
  episode_reward_max: 14.520000000000016
  episode_reward_mean: 8.010500000000018
  episode_reward_min: 0.9899999999999991
  episodes_this_iter: 20
  episodes_total: 5120
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2576406410762242
          entropy_coeff: 0.009999999999999998
          kl: 0.009921416431400197
          policy_loss: -0.0199783354110661
          total_loss: 0.29823336022950353
          vf_explained_var: 0.9479418992996216
          vf_loss: 0.3194869902162325
    num_agent_steps_sampled: 543456
    num_agent_steps_trained: 543456
    num_steps_sampled: 543456
    num_steps_trained: 543456
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,272,7283.83,543456,8.0105,14.52,0.99,99.65


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 545454
  custom_metrics: {}
  date: 2021-11-09_05-47-10
  done: false
  episode_len_mean: 98.43
  episode_media: {}
  episode_reward_max: 14.520000000000016
  episode_reward_mean: 7.618400000000019
  episode_reward_min: 0.750000000000011
  episodes_this_iter: 20
  episodes_total: 5140
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3046373038064867
          entropy_coeff: 0.009999999999999998
          kl: 0.00891628440367432
          policy_loss: -0.03617771707829975
          total_loss: 0.24372044259770995
          vf_explained_var: 0.9601364731788635
          vf_loss: 0.2827883302101067
    num_agent_steps_sampled: 545454
    num_agent_steps_trained: 545454
    num_steps_sampled: 545454
    num_steps_trained: 545454
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,273,7310.45,545454,7.6184,14.52,0.75,98.43


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 547452
  custom_metrics: {}
  date: 2021-11-09_05-47-36
  done: false
  episode_len_mean: 98.84
  episode_media: {}
  episode_reward_max: 14.520000000000016
  episode_reward_mean: 7.915400000000017
  episode_reward_min: 0.750000000000011
  episodes_this_iter: 20
  episodes_total: 5160
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2391177753607432
          entropy_coeff: 0.009999999999999998
          kl: 0.009635923838705382
          policy_loss: -0.017765809169837405
          total_loss: 0.3322959656782803
          vf_explained_var: 0.9504468441009521
          vf_loss: 0.3514770308775561
    num_agent_steps_sampled: 547452
    num_agent_steps_trained: 547452
    num_steps_sampled: 547452
    num_steps_trained: 547452
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,274,7336.33,547452,7.9154,14.52,0.75,98.84


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 549450
  custom_metrics: {}
  date: 2021-11-09_05-48-02
  done: false
  episode_len_mean: 99.04
  episode_media: {}
  episode_reward_max: 14.520000000000016
  episode_reward_mean: 7.8851000000000155
  episode_reward_min: 0.750000000000011
  episodes_this_iter: 21
  episodes_total: 5181
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.350164298784165
          entropy_coeff: 0.009999999999999998
          kl: 0.007067239702283499
          policy_loss: -0.018595119388330552
          total_loss: 0.11966773572688301
          vf_explained_var: 0.9665877819061279
          vf_loss: 0.1437144709307523
    num_agent_steps_sampled: 549450
    num_agent_steps_trained: 549450
    num_steps_sampled: 549450
    num_steps_trained: 549450


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,275,7361.87,549450,7.8851,14.52,0.75,99.04


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 551448
  custom_metrics: {}
  date: 2021-11-09_05-48-27
  done: false
  episode_len_mean: 99.43
  episode_media: {}
  episode_reward_max: 14.390000000000018
  episode_reward_mean: 8.064300000000019
  episode_reward_min: 0.750000000000011
  episodes_this_iter: 19
  episodes_total: 5200
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2418187334423973
          entropy_coeff: 0.009999999999999998
          kl: 0.007271343476332519
          policy_loss: -0.04159718286246061
          total_loss: 0.12758217748431933
          vf_explained_var: 0.9800394773483276
          vf_loss: 0.17331503409714927
    num_agent_steps_sampled: 551448
    num_agent_steps_trained: 551448
    num_steps_sampled: 551448
    num_steps_trained: 551448


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,276,7386.7,551448,8.0643,14.39,0.75,99.43


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 553446
  custom_metrics: {}
  date: 2021-11-09_05-48-53
  done: false
  episode_len_mean: 99.05
  episode_media: {}
  episode_reward_max: 14.390000000000018
  episode_reward_mean: 8.184400000000018
  episode_reward_min: 0.750000000000011
  episodes_this_iter: 20
  episodes_total: 5220
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2648623880885896
          entropy_coeff: 0.009999999999999998
          kl: 0.007004350442294734
          policy_loss: 0.020572803249316558
          total_loss: 0.20460542722472122
          vf_explained_var: 0.9666376113891602
          vf_loss: 0.18870285398193767
    num_agent_steps_sampled: 553446
    num_agent_steps_trained: 553446
    num_steps_sampled: 553446
    num_steps_trained: 553446


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,277,7412.92,553446,8.1844,14.39,0.75,99.05


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 555444
  custom_metrics: {}
  date: 2021-11-09_05-49-18
  done: false
  episode_len_mean: 100.08
  episode_media: {}
  episode_reward_max: 14.390000000000018
  episode_reward_mean: 8.190000000000019
  episode_reward_min: 2.6400000000000152
  episodes_this_iter: 21
  episodes_total: 5241
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2365763235659826
          entropy_coeff: 0.009999999999999998
          kl: 0.010255690231559225
          policy_loss: -0.008064306527376174
          total_loss: 0.4085118901073223
          vf_explained_var: 0.9545692205429077
          vf_loss: 0.4172600850108124
    num_agent_steps_sampled: 555444
    num_agent_steps_trained: 555444
    num_steps_sampled: 555444
    num_steps_trained: 555444

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,278,7437.74,555444,8.19,14.39,2.64,100.08


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 557442
  custom_metrics: {}
  date: 2021-11-09_05-49-44
  done: false
  episode_len_mean: 99.72
  episode_media: {}
  episode_reward_max: 14.390000000000018
  episode_reward_mean: 8.512600000000019
  episode_reward_min: 2.6400000000000152
  episodes_this_iter: 20
  episodes_total: 5261
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.0999234900588082
          entropy_coeff: 0.009999999999999998
          kl: 0.011027628630790787
          policy_loss: -0.025527179644753536
          total_loss: 0.30209433294477916
          vf_explained_var: 0.9548017382621765
          vf_loss: 0.32605958964143483
    num_agent_steps_sampled: 557442
    num_agent_steps_trained: 557442
    num_steps_sampled: 557442
    num_steps_trained: 55744

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,279,7464.17,557442,8.5126,14.39,2.64,99.72


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 559440
  custom_metrics: {}
  date: 2021-11-09_05-50-09
  done: false
  episode_len_mean: 100.75
  episode_media: {}
  episode_reward_max: 14.390000000000018
  episode_reward_mean: 8.69430000000002
  episode_reward_min: 2.4400000000000204
  episodes_this_iter: 20
  episodes_total: 5281
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3159865657488505
          entropy_coeff: 0.009999999999999998
          kl: 0.008408852002224338
          policy_loss: -0.0075203320100193935
          total_loss: 0.1485539138760595
          vf_explained_var: 0.9663788080215454
          vf_loss: 0.15965590115104403
    num_agent_steps_sampled: 559440
    num_agent_steps_trained: 559440
    num_steps_sampled: 559440
    num_steps_trained: 55944

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,280,7488.72,559440,8.6943,14.39,2.44,100.75


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 561438
  custom_metrics: {}
  date: 2021-11-09_05-50-37
  done: false
  episode_len_mean: 99.2
  episode_media: {}
  episode_reward_max: 14.090000000000021
  episode_reward_mean: 8.101600000000017
  episode_reward_min: 2.4400000000000204
  episodes_this_iter: 20
  episodes_total: 5301
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3142689880870637
          entropy_coeff: 0.009999999999999998
          kl: 0.008144936499045942
          policy_loss: -0.03095206847148282
          total_loss: 0.10969426667406446
          vf_explained_var: 0.9645714163780212
          vf_loss: 0.14451143317634152
    num_agent_steps_sampled: 561438
    num_agent_steps_trained: 561438
    num_steps_sampled: 561438
    num_steps_trained: 561438


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,281,7517.32,561438,8.1016,14.09,2.44,99.2


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 563436
  custom_metrics: {}
  date: 2021-11-09_05-51-03
  done: false
  episode_len_mean: 98.78
  episode_media: {}
  episode_reward_max: 14.090000000000021
  episode_reward_mean: 8.15710000000002
  episode_reward_min: 2.4400000000000204
  episodes_this_iter: 21
  episodes_total: 5322
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2801825103305635
          entropy_coeff: 0.009999999999999998
          kl: 0.005386191184887459
          policy_loss: -0.01700172344488757
          total_loss: 0.0879565677649918
          vf_explained_var: 0.9803221225738525
          vf_loss: 0.11162490782638392
    num_agent_steps_sampled: 563436
    num_agent_steps_trained: 563436
    num_steps_sampled: 563436
    num_steps_trained: 563436
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,282,7543.08,563436,8.1571,14.09,2.44,98.78




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 565434
  custom_metrics: {}
  date: 2021-11-09_05-51-46
  done: false
  episode_len_mean: 96.96
  episode_media: {}
  episode_reward_max: 14.460000000000017
  episode_reward_mean: 8.147300000000019
  episode_reward_min: -0.02
  episodes_this_iter: 21
  episodes_total: 5343
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2712002424966722
          entropy_coeff: 0.009999999999999998
          kl: 0.011082124757426056
          policy_loss: -0.00986209340570938
          total_loss: 0.44716332590296154
          vf_explained_var: 0.9388816952705383
          vf_loss: 0.45711418890527317
    num_agent_steps_sampled: 565434
    num_agent_steps_trained: 565434
    num_steps_sampled: 565434
    num_steps_trained: 565434
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,283,7585.79,565434,8.1473,14.46,-0.02,96.96




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 567432
  custom_metrics: {}
  date: 2021-11-09_05-52-40
  done: false
  episode_len_mean: 96.38
  episode_media: {}
  episode_reward_max: 14.460000000000017
  episode_reward_mean: 7.970800000000017
  episode_reward_min: -0.04
  episodes_this_iter: 22
  episodes_total: 5365
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.200730836391449
          entropy_coeff: 0.009999999999999998
          kl: 0.007979547035886709
          policy_loss: -0.06569960417137259
          total_loss: 0.25469672373895136
          vf_explained_var: 0.9527625441551208
          vf_loss: 0.32331443244502656
    num_agent_steps_sampled: 567432
    num_agent_steps_trained: 567432
    num_steps_sampled: 567432
    num_steps_trained: 567432
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,284,7640.04,567432,7.9708,14.46,-0.04,96.38


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 569430
  custom_metrics: {}
  date: 2021-11-09_05-53-09
  done: false
  episode_len_mean: 94.42
  episode_media: {}
  episode_reward_max: 14.510000000000016
  episode_reward_mean: 8.477400000000015
  episode_reward_min: -0.04
  episodes_this_iter: 21
  episodes_total: 5386
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.1853216279120673
          entropy_coeff: 0.009999999999999998
          kl: 0.00768616612941481
          policy_loss: -0.03188261107674667
          total_loss: 0.20324600296361106
          vf_explained_var: 0.9723497033119202
          vf_loss: 0.23822680825278872
    num_agent_steps_sampled: 569430
    num_agent_steps_trained: 569430
    num_steps_sampled: 569430
    num_steps_trained: 569430
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,285,7668.79,569430,8.4774,14.51,-0.04,94.42


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 571428
  custom_metrics: {}
  date: 2021-11-09_05-53-35
  done: false
  episode_len_mean: 93.84
  episode_media: {}
  episode_reward_max: 14.510000000000016
  episode_reward_mean: 8.644100000000018
  episode_reward_min: -0.04
  episodes_this_iter: 21
  episodes_total: 5407
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2244437546957108
          entropy_coeff: 0.009999999999999998
          kl: 0.006295582002255846
          policy_loss: -0.016915357059666087
          total_loss: 0.15630102512382327
          vf_explained_var: 0.9623178839683533
          vf_loss: 0.17828975616111642
    num_agent_steps_sampled: 571428
    num_agent_steps_trained: 571428
    num_steps_sampled: 571428
    num_steps_trained: 571428
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,286,7694.23,571428,8.6441,14.51,-0.04,93.84


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 573426
  custom_metrics: {}
  date: 2021-11-09_05-54-01
  done: false
  episode_len_mean: 94.19
  episode_media: {}
  episode_reward_max: 14.510000000000016
  episode_reward_mean: 8.629200000000017
  episode_reward_min: -0.04
  episodes_this_iter: 21
  episodes_total: 5428
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2593601851236254
          entropy_coeff: 0.009999999999999998
          kl: 0.008324126597476661
          policy_loss: -0.05808073553434085
          total_loss: 0.12334849494509399
          vf_explained_var: 0.9697974920272827
          vf_loss: 0.18454112944503626
    num_agent_steps_sampled: 573426
    num_agent_steps_trained: 573426
    num_steps_sampled: 573426
    num_steps_trained: 573426
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,287,7720.52,573426,8.6292,14.51,-0.04,94.19


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 575424
  custom_metrics: {}
  date: 2021-11-09_05-54-26
  done: false
  episode_len_mean: 96.75
  episode_media: {}
  episode_reward_max: 14.510000000000016
  episode_reward_mean: 8.761100000000017
  episode_reward_min: 2.9900000000000158
  episodes_this_iter: 19
  episodes_total: 5447
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.398820716426486
          entropy_coeff: 0.009999999999999998
          kl: 0.008269730288295273
          policy_loss: -0.038262570117201126
          total_loss: 0.10276588225471121
          vf_explained_var: 0.9749879240989685
          vf_loss: 0.1455969183572701
    num_agent_steps_sampled: 575424
    num_agent_steps_trained: 575424
    num_steps_sampled: 575424
    num_steps_trained: 575424


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,288,7745.93,575424,8.7611,14.51,2.99,96.75


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 577422
  custom_metrics: {}
  date: 2021-11-09_05-54-52
  done: false
  episode_len_mean: 96.88
  episode_media: {}
  episode_reward_max: 14.510000000000016
  episode_reward_mean: 8.691500000000017
  episode_reward_min: 2.7800000000000162
  episodes_this_iter: 21
  episodes_total: 5468
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.284337324187869
          entropy_coeff: 0.009999999999999998
          kl: 0.007858622106661519
          policy_loss: -0.021171922910781134
          total_loss: 0.18869891886909804
          vf_explained_var: 0.9696707129478455
          vf_loss: 0.2137627525698571
    num_agent_steps_sampled: 577422
    num_agent_steps_trained: 577422
    num_steps_sampled: 577422
    num_steps_trained: 577422


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,289,7772.07,577422,8.6915,14.51,2.78,96.88


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 579420
  custom_metrics: {}
  date: 2021-11-09_05-55-19
  done: false
  episode_len_mean: 97.2
  episode_media: {}
  episode_reward_max: 14.460000000000017
  episode_reward_mean: 8.399200000000018
  episode_reward_min: 2.7800000000000162
  episodes_this_iter: 21
  episodes_total: 5489
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.4058173565637497
          entropy_coeff: 0.009999999999999998
          kl: 0.006272578808726675
          policy_loss: -0.09782600601514181
          total_loss: 0.09140950141563302
          vf_explained_var: 0.9693292379379272
          vf_loss: 0.19614882192441396
    num_agent_steps_sampled: 579420
    num_agent_steps_trained: 579420
    num_steps_sampled: 579420
    num_steps_trained: 579420


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,290,7798.42,579420,8.3992,14.46,2.78,97.2


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 581418
  custom_metrics: {}
  date: 2021-11-09_05-55-44
  done: false
  episode_len_mean: 97.66
  episode_media: {}
  episode_reward_max: 14.460000000000017
  episode_reward_mean: 8.489000000000019
  episode_reward_min: 2.7800000000000162
  episodes_this_iter: 20
  episodes_total: 5509
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2882004976272583
          entropy_coeff: 0.009999999999999998
          kl: 0.010453058933121895
          policy_loss: -0.015470736207706587
          total_loss: 0.299636415338942
          vf_explained_var: 0.9508455395698547
          vf_loss: 0.31608246728068307
    num_agent_steps_sampled: 581418
    num_agent_steps_trained: 581418
    num_steps_sampled: 581418
    num_steps_trained: 581418


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,291,7823.85,581418,8.489,14.46,2.78,97.66


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 583416
  custom_metrics: {}
  date: 2021-11-09_05-56-11
  done: false
  episode_len_mean: 97.42
  episode_media: {}
  episode_reward_max: 14.460000000000017
  episode_reward_mean: 8.244600000000018
  episode_reward_min: 2.640000000000016
  episodes_this_iter: 21
  episodes_total: 5530
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.4601934603282383
          entropy_coeff: 0.009999999999999998
          kl: 0.006969860651864826
          policy_loss: -0.055094072648457115
          total_loss: 0.08236929935713609
          vf_explained_var: 0.9693991541862488
          vf_loss: 0.14412620106623286
    num_agent_steps_sampled: 583416
    num_agent_steps_trained: 583416
    num_steps_sampled: 583416
    num_steps_trained: 583416

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,292,7850.92,583416,8.2446,14.46,2.64,97.42


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 585414
  custom_metrics: {}
  date: 2021-11-09_05-56-37
  done: false
  episode_len_mean: 98.01
  episode_media: {}
  episode_reward_max: 14.470000000000018
  episode_reward_mean: 8.396200000000018
  episode_reward_min: 2.640000000000016
  episodes_this_iter: 19
  episodes_total: 5549
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3631144756362552
          entropy_coeff: 0.009999999999999998
          kl: 0.007622601017845728
          policy_loss: -0.0448221544247298
          total_loss: 0.08301185704767704
          vf_explained_var: 0.984360933303833
          vf_loss: 0.13278253690472672
    num_agent_steps_sampled: 585414
    num_agent_steps_trained: 585414
    num_steps_sampled: 585414
    num_steps_trained: 585414
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,293,7876.46,585414,8.3962,14.47,2.64,98.01


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 587412
  custom_metrics: {}
  date: 2021-11-09_05-57-04
  done: false
  episode_len_mean: 96.79
  episode_media: {}
  episode_reward_max: 14.530000000000015
  episode_reward_mean: 8.378700000000018
  episode_reward_min: 2.640000000000016
  episodes_this_iter: 22
  episodes_total: 5571
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.320798261676516
          entropy_coeff: 0.009999999999999998
          kl: 0.00956619961356803
          policy_loss: 0.018620175921491216
          total_loss: 0.2608728279049198
          vf_explained_var: 0.9676933884620667
          vf_loss: 0.24456413500010968
    num_agent_steps_sampled: 587412
    num_agent_steps_trained: 587412
    num_steps_sampled: 587412
    num_steps_trained: 587412
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,294,7903.63,587412,8.3787,14.53,2.64,96.79


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 589410
  custom_metrics: {}
  date: 2021-11-09_05-57-29
  done: false
  episode_len_mean: 97.24
  episode_media: {}
  episode_reward_max: 14.530000000000015
  episode_reward_mean: 8.138100000000017
  episode_reward_min: 2.640000000000016
  episodes_this_iter: 20
  episodes_total: 5591
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.347116460118975
          entropy_coeff: 0.009999999999999998
          kl: 0.00766590816080926
          policy_loss: -0.039752814377702415
          total_loss: 0.10631605736201717
          vf_explained_var: 0.9587530493736267
          vf_loss: 0.15080808764767079
    num_agent_steps_sampled: 589410
    num_agent_steps_trained: 589410
    num_steps_sampled: 589410
    num_steps_trained: 589410
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,295,7928.93,589410,8.1381,14.53,2.64,97.24


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 591408
  custom_metrics: {}
  date: 2021-11-09_05-57-57
  done: false
  episode_len_mean: 96.11
  episode_media: {}
  episode_reward_max: 14.530000000000015
  episode_reward_mean: 7.9895000000000165
  episode_reward_min: 2.640000000000016
  episodes_this_iter: 22
  episodes_total: 5613
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3583972964968
          entropy_coeff: 0.009999999999999998
          kl: 0.011875553118216314
          policy_loss: -0.0069321321234816595
          total_loss: 0.24213541585597254
          vf_explained_var: 0.96843421459198
          vf_loss: 0.24912452500845705
    num_agent_steps_sampled: 591408
    num_agent_steps_trained: 591408
    num_steps_sampled: 591408
    num_steps_trained: 591408
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,296,7956.36,591408,7.9895,14.53,2.64,96.11


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 593406
  custom_metrics: {}
  date: 2021-11-09_05-58-23
  done: false
  episode_len_mean: 96.81
  episode_media: {}
  episode_reward_max: 14.530000000000015
  episode_reward_mean: 8.081100000000017
  episode_reward_min: 2.550000000000016
  episodes_this_iter: 20
  episodes_total: 5633
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3109973527136303
          entropy_coeff: 0.009999999999999998
          kl: 0.008568122291453185
          policy_loss: -0.03191277918716272
          total_loss: 0.17020867733018738
          vf_explained_var: 0.96840500831604
          vf_loss: 0.20547180211260205
    num_agent_steps_sampled: 593406
    num_agent_steps_trained: 593406
    num_steps_sampled: 593406
    num_steps_trained: 593406
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,297,7982.79,593406,8.0811,14.53,2.55,96.81


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 595404
  custom_metrics: {}
  date: 2021-11-09_05-58-49
  done: false
  episode_len_mean: 96.73
  episode_media: {}
  episode_reward_max: 14.500000000000018
  episode_reward_mean: 7.970100000000018
  episode_reward_min: 2.550000000000016
  episodes_this_iter: 20
  episodes_total: 5653
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.1316296529202234
          entropy_coeff: 0.009999999999999998
          kl: 0.006088562164106394
          policy_loss: 0.0002647356085834049
          total_loss: 0.16325493825688248
          vf_explained_var: 0.969072699546814
          vf_loss: 0.16737124589937075
    num_agent_steps_sampled: 595404
    num_agent_steps_trained: 595404
    num_steps_sampled: 595404
    num_steps_trained: 595404


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,298,8008.06,595404,7.9701,14.5,2.55,96.73


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 597402
  custom_metrics: {}
  date: 2021-11-09_05-59-15
  done: false
  episode_len_mean: 97.52
  episode_media: {}
  episode_reward_max: 14.490000000000016
  episode_reward_mean: 7.673000000000018
  episode_reward_min: 2.550000000000016
  episodes_this_iter: 20
  episodes_total: 5673
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.365710855665661
          entropy_coeff: 0.009999999999999998
          kl: 0.006701762839933385
          policy_loss: 0.0031870343678054355
          total_loss: 0.13052367867813225
          vf_explained_var: 0.9728474020957947
          vf_loss: 0.13336002649295897
    num_agent_steps_sampled: 597402
    num_agent_steps_trained: 597402
    num_steps_sampled: 597402
    num_steps_trained: 597402


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,299,8034.5,597402,7.673,14.49,2.55,97.52




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 599400
  custom_metrics: {}
  date: 2021-11-09_05-59-56
  done: false
  episode_len_mean: 96.21
  episode_media: {}
  episode_reward_max: 14.490000000000016
  episode_reward_mean: 7.885200000000018
  episode_reward_min: -0.9300000000000004
  episodes_this_iter: 22
  episodes_total: 5695
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3499255855878194
          entropy_coeff: 0.009999999999999998
          kl: 0.010289448945707238
          policy_loss: -0.0030843831953548248
          total_loss: 0.2606536429818897
          vf_explained_var: 0.9488939642906189
          vf_loss: 0.2655169583324875
    num_agent_steps_sampled: 599400
    num_agent_steps_trained: 599400
    num_steps_sampled: 599400
    num_steps_trained: 59940

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,300,8074.9,599400,7.8852,14.49,-0.93,96.21




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 601398
  custom_metrics: {}
  date: 2021-11-09_06-00-50
  done: false
  episode_len_mean: 94.58
  episode_media: {}
  episode_reward_max: 14.470000000000017
  episode_reward_mean: 7.721000000000016
  episode_reward_min: -0.9300000000000004
  episodes_this_iter: 23
  episodes_total: 5718
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2433589322226388
          entropy_coeff: 0.009999999999999998
          kl: 0.011807381576731554
          policy_loss: -0.026393378854152702
          total_loss: 0.29181354292890144
          vf_explained_var: 0.9526144862174988
          vf_loss: 0.3171911659694853
    num_agent_steps_sampled: 601398
    num_agent_steps_trained: 601398
    num_steps_sampled: 601398
    num_steps_trained: 60139

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,301,8128.92,601398,7.721,14.47,-0.93,94.58


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 603396
  custom_metrics: {}
  date: 2021-11-09_06-01-16
  done: false
  episode_len_mean: 94.77
  episode_media: {}
  episode_reward_max: 14.470000000000017
  episode_reward_mean: 8.119200000000015
  episode_reward_min: -0.9300000000000004
  episodes_this_iter: 20
  episodes_total: 5738
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2416561035882858
          entropy_coeff: 0.009999999999999998
          kl: 0.010914254311238034
          policy_loss: -0.0915341990334647
          total_loss: 0.13482485819785367
          vf_explained_var: 0.9675099849700928
          vf_loss: 0.2263436009841306
    num_agent_steps_sampled: 603396
    num_agent_steps_trained: 603396
    num_steps_sampled: 603396
    num_steps_trained: 603396


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,302,8155.48,603396,8.1192,14.47,-0.93,94.77


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 605394
  custom_metrics: {}
  date: 2021-11-09_06-01-42
  done: false
  episode_len_mean: 95.2
  episode_media: {}
  episode_reward_max: 14.480000000000015
  episode_reward_mean: 8.529000000000016
  episode_reward_min: -0.9300000000000004
  episodes_this_iter: 19
  episodes_total: 5757
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2639827762331282
          entropy_coeff: 0.009999999999999998
          kl: 0.00971011580614214
          policy_loss: -0.022461367664592606
          total_loss: 0.2427133953287488
          vf_explained_var: 0.9648404121398926
          vf_loss: 0.26675416016507714
    num_agent_steps_sampled: 605394
    num_agent_steps_trained: 605394
    num_steps_sampled: 605394
    num_steps_trained: 605394


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,303,8181.32,605394,8.529,14.48,-0.93,95.2


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 607392
  custom_metrics: {}
  date: 2021-11-09_06-02-08
  done: false
  episode_len_mean: 94.92
  episode_media: {}
  episode_reward_max: 14.600000000000016
  episode_reward_mean: 8.798900000000017
  episode_reward_min: -0.9300000000000004
  episodes_this_iter: 21
  episodes_total: 5778
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2405522485574088
          entropy_coeff: 0.009999999999999998
          kl: 0.007136468112908061
          policy_loss: -0.029144861762012753
          total_loss: 0.15495562636781307
          vf_explained_var: 0.9704666137695312
          vf_loss: 0.18837712758353778
    num_agent_steps_sampled: 607392
    num_agent_steps_trained: 607392
    num_steps_sampled: 607392
    num_steps_trained: 6073

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,304,8207.06,607392,8.7989,14.6,-0.93,94.92


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 609390
  custom_metrics: {}
  date: 2021-11-09_06-02-33
  done: false
  episode_len_mean: 97.48
  episode_media: {}
  episode_reward_max: 14.600000000000016
  episode_reward_mean: 8.894300000000017
  episode_reward_min: -0.07
  episodes_this_iter: 20
  episodes_total: 5798
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3346087847437178
          entropy_coeff: 0.009999999999999998
          kl: 0.00763379862246083
          policy_loss: -0.03561401014171896
          total_loss: 0.1433802787569307
          vf_explained_var: 0.9805277585983276
          vf_loss: 0.18364500069015083
    num_agent_steps_sampled: 609390
    num_agent_steps_trained: 609390
    num_steps_sampled: 609390
    num_steps_trained: 609390
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,305,8231.93,609390,8.8943,14.6,-0.07,97.48


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 611388
  custom_metrics: {}
  date: 2021-11-09_06-02-59
  done: false
  episode_len_mean: 99.44
  episode_media: {}
  episode_reward_max: 14.600000000000016
  episode_reward_mean: 9.138600000000018
  episode_reward_min: 2.4700000000000197
  episodes_this_iter: 20
  episodes_total: 5818
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3177657615570795
          entropy_coeff: 0.009999999999999998
          kl: 0.006817041483881586
          policy_loss: -0.030266658411849112
          total_loss: 0.10329165637847923
          vf_explained_var: 0.9756495952606201
          vf_loss: 0.13897093694124904
    num_agent_steps_sampled: 611388
    num_agent_steps_trained: 611388
    num_steps_sampled: 611388
    num_steps_trained: 61138

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,306,8257.65,611388,9.1386,14.6,2.47,99.44


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 613386
  custom_metrics: {}
  date: 2021-11-09_06-03-22
  done: false
  episode_len_mean: 101.27
  episode_media: {}
  episode_reward_max: 14.600000000000016
  episode_reward_mean: 8.985200000000019
  episode_reward_min: 2.3000000000000274
  episodes_this_iter: 18
  episodes_total: 5836
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.297631938116891
          entropy_coeff: 0.009999999999999998
          kl: 0.007042905657675309
          policy_loss: -0.009263151422852561
          total_loss: 0.12301412625681786
          vf_explained_var: 0.9844309687614441
          vf_loss: 0.13723128670383067
    num_agent_steps_sampled: 613386
    num_agent_steps_trained: 613386
    num_steps_sampled: 613386
    num_steps_trained: 61338

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,307,8280.69,613386,8.9852,14.6,2.3,101.27


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 615384
  custom_metrics: {}
  date: 2021-11-09_06-03-47
  done: false
  episode_len_mean: 101.6
  episode_media: {}
  episode_reward_max: 14.600000000000016
  episode_reward_mean: 8.937000000000019
  episode_reward_min: 2.3000000000000274
  episodes_this_iter: 20
  episodes_total: 5856
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3245651114554633
          entropy_coeff: 0.009999999999999998
          kl: 0.0095174603263617
          policy_loss: -0.031058060980978467
          total_loss: 0.18529041942563795
          vf_explained_var: 0.9763503074645996
          vf_loss: 0.21875314648662295
    num_agent_steps_sampled: 615384
    num_agent_steps_trained: 615384
    num_steps_sampled: 615384
    num_steps_trained: 615384


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,308,8306.11,615384,8.937,14.6,2.3,101.6


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 617382
  custom_metrics: {}
  date: 2021-11-09_06-04-12
  done: false
  episode_len_mean: 101.76
  episode_media: {}
  episode_reward_max: 14.600000000000016
  episode_reward_mean: 8.699500000000018
  episode_reward_min: 2.3000000000000274
  episodes_this_iter: 21
  episodes_total: 5877
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2807923873265585
          entropy_coeff: 0.009999999999999998
          kl: 0.008340895499492499
          policy_loss: -0.08425914081079619
          total_loss: 0.1894460071366103
          vf_explained_var: 0.9541444778442383
          vf_loss: 0.2770122707244896
    num_agent_steps_sampled: 617382
    num_agent_steps_trained: 617382
    num_steps_sampled: 617382
    num_steps_trained: 617382


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,309,8331.27,617382,8.6995,14.6,2.3,101.76


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 619380
  custom_metrics: {}
  date: 2021-11-09_06-04-36
  done: false
  episode_len_mean: 102.4
  episode_media: {}
  episode_reward_max: 14.510000000000016
  episode_reward_mean: 8.406500000000019
  episode_reward_min: 2.3000000000000274
  episodes_this_iter: 18
  episodes_total: 5895
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3129312906946455
          entropy_coeff: 0.009999999999999998
          kl: 0.011068693406404598
          policy_loss: -0.0019634832051538286
          total_loss: 0.33462216612838563
          vf_explained_var: 0.9485781788825989
          vf_loss: 0.3371070343646265
    num_agent_steps_sampled: 619380
    num_agent_steps_trained: 619380
    num_steps_sampled: 619380
    num_steps_trained: 61938

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,310,8355.41,619380,8.4065,14.51,2.3,102.4


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 621378
  custom_metrics: {}
  date: 2021-11-09_06-05-02
  done: false
  episode_len_mean: 103.32
  episode_media: {}
  episode_reward_max: 14.560000000000015
  episode_reward_mean: 8.539700000000018
  episode_reward_min: 2.1400000000000223
  episodes_this_iter: 20
  episodes_total: 5915
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2634616936956133
          entropy_coeff: 0.009999999999999998
          kl: 0.00757920542650492
          policy_loss: -0.040289596654474735
          total_loss: 0.1518294477037021
          vf_explained_var: 0.9709740281105042
          vf_loss: 0.19612046827872595
    num_agent_steps_sampled: 621378
    num_agent_steps_trained: 621378
    num_steps_sampled: 621378
    num_steps_trained: 621378

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,311,8380.85,621378,8.5397,14.56,2.14,103.32


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 623376
  custom_metrics: {}
  date: 2021-11-09_06-05-26
  done: false
  episode_len_mean: 101.8
  episode_media: {}
  episode_reward_max: 14.560000000000015
  episode_reward_mean: 8.309300000000018
  episode_reward_min: 2.1400000000000223
  episodes_this_iter: 20
  episodes_total: 5935
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3202674984931946
          entropy_coeff: 0.009999999999999998
          kl: 0.009725692704796968
          policy_loss: -0.025309374786558606
          total_loss: 0.1677281198535292
          vf_explained_var: 0.9688036441802979
          vf_loss: 0.1951619993363108
    num_agent_steps_sampled: 623376
    num_agent_steps_trained: 623376
    num_steps_sampled: 623376
    num_steps_trained: 623376


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,312,8405.22,623376,8.3093,14.56,2.14,101.8


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 625374
  custom_metrics: {}
  date: 2021-11-09_06-05-52
  done: false
  episode_len_mean: 101.66
  episode_media: {}
  episode_reward_max: 14.560000000000015
  episode_reward_mean: 7.861100000000017
  episode_reward_min: 2.1400000000000223
  episodes_this_iter: 19
  episodes_total: 5954
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3729094715345473
          entropy_coeff: 0.009999999999999998
          kl: 0.0071089508887110095
          policy_loss: 0.004474679735444841
          total_loss: 0.1752691260405949
          vf_explained_var: 0.9453265070915222
          vf_loss: 0.17642600092859495
    num_agent_steps_sampled: 625374
    num_agent_steps_trained: 625374
    num_steps_sampled: 625374
    num_steps_trained: 62537

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,313,8430.54,625374,7.8611,14.56,2.14,101.66


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 627372
  custom_metrics: {}
  date: 2021-11-09_06-06-16
  done: false
  episode_len_mean: 103.57
  episode_media: {}
  episode_reward_max: 14.560000000000015
  episode_reward_mean: 7.791300000000017
  episode_reward_min: 1.9200000000000135
  episodes_this_iter: 18
  episodes_total: 5972
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2368049241247632
          entropy_coeff: 0.009999999999999998
          kl: 0.016238027077817182
          policy_loss: -0.027221323922276497
          total_loss: 0.34872675289710364
          vf_explained_var: 0.922298014163971
          vf_loss: 0.3698199959737914
    num_agent_steps_sampled: 627372
    num_agent_steps_trained: 627372
    num_steps_sampled: 627372
    num_steps_trained: 627372

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,314,8454.7,627372,7.7913,14.56,1.92,103.57


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 629370
  custom_metrics: {}
  date: 2021-11-09_06-06-41
  done: false
  episode_len_mean: 103.22
  episode_media: {}
  episode_reward_max: 14.560000000000015
  episode_reward_mean: 7.8888000000000185
  episode_reward_min: 1.9200000000000135
  episodes_this_iter: 20
  episodes_total: 5992
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2347448865572612
          entropy_coeff: 0.009999999999999998
          kl: 0.007781012677058421
          policy_loss: -0.038177728227206637
          total_loss: 0.207445010596088
          vf_explained_var: 0.9378317594528198
          vf_loss: 0.24910712795598167
    num_agent_steps_sampled: 629370
    num_agent_steps_trained: 629370
    num_steps_sampled: 629370
    num_steps_trained: 62937

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,315,8480.2,629370,7.8888,14.56,1.92,103.22


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 631368
  custom_metrics: {}
  date: 2021-11-09_06-07-07
  done: false
  episode_len_mean: 103.13
  episode_media: {}
  episode_reward_max: 14.510000000000016
  episode_reward_mean: 7.673000000000019
  episode_reward_min: 1.9200000000000135
  episodes_this_iter: 20
  episodes_total: 6012
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2657837510108947
          entropy_coeff: 0.009999999999999998
          kl: 0.009025867258374613
          policy_loss: -0.03296610970227491
          total_loss: 0.16502917748654172
          vf_explained_var: 0.9621362686157227
          vf_loss: 0.2003720967187768
    num_agent_steps_sampled: 631368
    num_agent_steps_trained: 631368
    num_steps_sampled: 631368
    num_steps_trained: 631368

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,316,8505.25,631368,7.673,14.51,1.92,103.13


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 633366
  custom_metrics: {}
  date: 2021-11-09_06-07-32
  done: false
  episode_len_mean: 102.87
  episode_media: {}
  episode_reward_max: 14.510000000000016
  episode_reward_mean: 7.593700000000018
  episode_reward_min: 1.9200000000000135
  episodes_this_iter: 19
  episodes_total: 6031
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.4326064387957256
          entropy_coeff: 0.009999999999999998
          kl: 0.010196276658626545
          policy_loss: -0.001278564085563024
          total_loss: 0.09863783796096132
          vf_explained_var: 0.9837288856506348
          vf_loss: 0.10262826727259727
    num_agent_steps_sampled: 633366
    num_agent_steps_trained: 633366
    num_steps_sampled: 633366
    num_steps_trained: 6333

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,317,8530.4,633366,7.5937,14.51,1.92,102.87




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 635364
  custom_metrics: {}
  date: 2021-11-09_06-08-12
  done: false
  episode_len_mean: 103.15
  episode_media: {}
  episode_reward_max: 14.510000000000016
  episode_reward_mean: 7.692200000000018
  episode_reward_min: -0.02
  episodes_this_iter: 20
  episodes_total: 6051
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3352373265084767
          entropy_coeff: 0.009999999999999998
          kl: 0.005868270707514852
          policy_loss: -0.029777866424549194
          total_loss: 0.14744337428183782
          vf_explained_var: 0.9707704186439514
          vf_loss: 0.18388928433968907
    num_agent_steps_sampled: 635364
    num_agent_steps_trained: 635364
    num_steps_sampled: 635364
    num_steps_trained: 635364
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,318,8570.42,635364,7.6922,14.51,-0.02,103.15




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 637362
  custom_metrics: {}
  date: 2021-11-09_06-08-53
  done: false
  episode_len_mean: 99.72
  episode_media: {}
  episode_reward_max: 14.500000000000016
  episode_reward_mean: 7.804200000000019
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 22
  episodes_total: 6073
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.2549826301279523
          entropy_coeff: 0.009999999999999998
          kl: 0.008143174336559528
          policy_loss: 0.0024234875060972715
          total_loss: 0.2853721315662066
          vf_explained_var: 0.9639819860458374
          vf_loss: 0.28622288292362574
    num_agent_steps_sampled: 637362
    num_agent_steps_trained: 637362
    num_steps_sampled: 637362
    num_steps_trained: 637

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,319,8611.83,637362,7.8042,14.5,-0.06,99.72


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 639360
  custom_metrics: {}
  date: 2021-11-09_06-09-20
  done: false
  episode_len_mean: 98.91
  episode_media: {}
  episode_reward_max: 14.500000000000016
  episode_reward_mean: 7.5996000000000175
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 6093
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3610617461658658
          entropy_coeff: 0.009999999999999998
          kl: 0.005413807772278378
          policy_loss: 0.0006613577760401226
          total_loss: 0.08950198745975892
          vf_explained_var: 0.9809530973434448
          vf_loss: 0.09628458220866465
    num_agent_steps_sampled: 639360
    num_agent_steps_trained: 639360
    num_steps_sampled: 639360
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,320,8638.82,639360,7.5996,14.5,-0.06,98.91


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 641358
  custom_metrics: {}
  date: 2021-11-09_06-09-46
  done: false
  episode_len_mean: 98.45
  episode_media: {}
  episode_reward_max: 14.500000000000016
  episode_reward_mean: 7.586300000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 6114
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3224298783711024
          entropy_coeff: 0.009999999999999998
          kl: 0.004952700170160624
          policy_loss: -0.09212660439135063
          total_loss: 0.04889947387079398
          vf_explained_var: 0.9721863269805908
          vf_loss: 0.1486089398463567
    num_agent_steps_sampled: 641358
    num_agent_steps_trained: 641358
    num_steps_sampled: 641358
    num_steps_trained: 6413

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,321,8664.63,641358,7.5863,14.5,-0.06,98.45


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 643356
  custom_metrics: {}
  date: 2021-11-09_06-10-12
  done: false
  episode_len_mean: 97.15
  episode_media: {}
  episode_reward_max: 14.500000000000016
  episode_reward_mean: 7.643600000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 6133
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3385417904172625
          entropy_coeff: 0.009999999999999998
          kl: 0.008653353120958399
          policy_loss: -0.07999962340330793
          total_loss: 0.019447026953899434
          vf_explained_var: 0.9820808172225952
          vf_loss: 0.10790371565769116
    num_agent_steps_sampled: 643356
    num_agent_steps_trained: 643356
    num_steps_sampled: 643356
    num_steps_trained: 64

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,322,8690.2,643356,7.6436,14.5,-0.06,97.15


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 645354
  custom_metrics: {}
  date: 2021-11-09_06-10-36
  done: false
  episode_len_mean: 98.54
  episode_media: {}
  episode_reward_max: 14.600000000000016
  episode_reward_mean: 7.725700000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 6154
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3415324733370826
          entropy_coeff: 0.009999999999999998
          kl: 0.013769056187539545
          policy_loss: -0.02720396516046354
          total_loss: 0.22429359061199994
          vf_explained_var: 0.9631147384643555
          vf_loss: 0.25707097065945467
    num_agent_steps_sampled: 645354
    num_agent_steps_trained: 645354
    num_steps_sampled: 645354
    num_steps_trained: 645

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,323,8714.65,645354,7.7257,14.6,-0.06,98.54


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 647352
  custom_metrics: {}
  date: 2021-11-09_06-11-02
  done: false
  episode_len_mean: 99.77
  episode_media: {}
  episode_reward_max: 14.600000000000016
  episode_reward_mean: 7.700700000000017
  episode_reward_min: 2.730000000000017
  episodes_this_iter: 19
  episodes_total: 6173
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3631918804986136
          entropy_coeff: 0.009999999999999998
          kl: 0.01143959940595484
          policy_loss: -0.045070055801244006
          total_loss: 0.15435975872068888
          vf_explained_var: 0.9603191614151001
          vf_loss: 0.2065465254088243
    num_agent_steps_sampled: 647352
    num_agent_steps_trained: 647352
    num_steps_sampled: 647352
    num_steps_trained: 647352
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,324,8740.24,647352,7.7007,14.6,2.73,99.77


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 649350
  custom_metrics: {}
  date: 2021-11-09_06-11-27
  done: false
  episode_len_mean: 99.65
  episode_media: {}
  episode_reward_max: 14.600000000000016
  episode_reward_mean: 8.005800000000018
  episode_reward_min: 2.730000000000017
  episodes_this_iter: 20
  episodes_total: 6193
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3936658484595164
          entropy_coeff: 0.009999999999999998
          kl: 0.007582191297501996
          policy_loss: -0.10740059362280936
          total_loss: -0.03816273029716242
          vf_explained_var: 0.9791226387023926
          vf_loss: 0.07885622497470605
    num_agent_steps_sampled: 649350
    num_agent_steps_trained: 649350
    num_steps_sampled: 649350
    num_steps_trained: 649350

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,325,8765.08,649350,8.0058,14.6,2.73,99.65


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 651348
  custom_metrics: {}
  date: 2021-11-09_06-11-53
  done: false
  episode_len_mean: 98.77
  episode_media: {}
  episode_reward_max: 14.600000000000016
  episode_reward_mean: 7.953700000000018
  episode_reward_min: 2.730000000000017
  episodes_this_iter: 21
  episodes_total: 6214
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.142639585619881
          entropy_coeff: 0.009999999999999998
          kl: 0.015675833456691993
          policy_loss: -0.024350583517835252
          total_loss: 0.21015512840378853
          vf_explained_var: 0.9639255404472351
          vf_loss: 0.2370042291070734
    num_agent_steps_sampled: 651348
    num_agent_steps_trained: 651348
    num_steps_sampled: 651348
    num_steps_trained: 651348
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,326,8791.41,651348,7.9537,14.6,2.73,98.77


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 653346
  custom_metrics: {}
  date: 2021-11-09_06-12-19
  done: false
  episode_len_mean: 98.51
  episode_media: {}
  episode_reward_max: 14.600000000000016
  episode_reward_mean: 7.843700000000018
  episode_reward_min: 2.730000000000017
  episodes_this_iter: 21
  episodes_total: 6235
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.348704231353033
          entropy_coeff: 0.009999999999999998
          kl: 0.014119688553355624
          policy_loss: 0.030383043328211422
          total_loss: 0.3220374312384852
          vf_explained_var: 0.9455980658531189
          vf_loss: 0.2970998193359091
    num_agent_steps_sampled: 653346
    num_agent_steps_trained: 653346
    num_steps_sampled: 653346
    num_steps_trained: 653346
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,327,8817.49,653346,7.8437,14.6,2.73,98.51


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 655344
  custom_metrics: {}
  date: 2021-11-09_06-12-44
  done: false
  episode_len_mean: 98.48
  episode_media: {}
  episode_reward_max: 14.540000000000015
  episode_reward_mean: 7.743700000000017
  episode_reward_min: 2.7300000000000133
  episodes_this_iter: 20
  episodes_total: 6255
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3869074588730221
          entropy_coeff: 0.009999999999999998
          kl: 0.01067370691544567
          policy_loss: -0.04809710421555099
          total_loss: 0.12638575141983374
          vf_explained_var: 0.9640377163887024
          vf_loss: 0.1822729203850031
    num_agent_steps_sampled: 655344
    num_agent_steps_trained: 655344
    num_steps_sampled: 655344
    num_steps_trained: 655344
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,328,8842.75,655344,7.7437,14.54,2.73,98.48


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 657342
  custom_metrics: {}
  date: 2021-11-09_06-13-12
  done: false
  episode_len_mean: 98.6
  episode_media: {}
  episode_reward_max: 14.510000000000014
  episode_reward_mean: 7.616800000000017
  episode_reward_min: 2.6300000000000114
  episodes_this_iter: 20
  episodes_total: 6275
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.346135629074914
          entropy_coeff: 0.009999999999999998
          kl: 0.011249463457396709
          policy_loss: -0.060158145693796024
          total_loss: 0.12156166911479972
          vf_explained_var: 0.9698509573936462
          vf_loss: 0.18877424938338144
    num_agent_steps_sampled: 657342
    num_agent_steps_trained: 657342
    num_steps_sampled: 657342
    num_steps_trained: 657342


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,329,8869.86,657342,7.6168,14.51,2.63,98.6


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 659340
  custom_metrics: {}
  date: 2021-11-09_06-13-39
  done: false
  episode_len_mean: 97.78
  episode_media: {}
  episode_reward_max: 14.460000000000017
  episode_reward_mean: 7.712700000000016
  episode_reward_min: 2.6300000000000114
  episodes_this_iter: 20
  episodes_total: 6295
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2800861568677993
          entropy_coeff: 0.009999999999999998
          kl: 0.009757849133289594
          policy_loss: -0.042126618698239325
          total_loss: 0.06994981784373522
          vf_explained_var: 0.9764447808265686
          vf_loss: 0.11931989901654777
    num_agent_steps_sampled: 659340
    num_agent_steps_trained: 659340
    num_steps_sampled: 659340
    num_steps_trained: 65934

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,330,8897.33,659340,7.7127,14.46,2.63,97.78


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 661338
  custom_metrics: {}
  date: 2021-11-09_06-14-06
  done: false
  episode_len_mean: 97.5
  episode_media: {}
  episode_reward_max: 14.520000000000016
  episode_reward_mean: 7.880500000000017
  episode_reward_min: 2.6300000000000114
  episodes_this_iter: 22
  episodes_total: 6317
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2520311798368182
          entropy_coeff: 0.009999999999999998
          kl: 0.01736684528752612
          policy_loss: 0.003613087676820301
          total_loss: 0.47367068093624853
          vf_explained_var: 0.9368197321891785
          vf_loss: 0.47268694206362677
    num_agent_steps_sampled: 661338
    num_agent_steps_trained: 661338
    num_steps_sampled: 661338
    num_steps_trained: 661338
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,331,8924.49,661338,7.8805,14.52,2.63,97.5


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 663336
  custom_metrics: {}
  date: 2021-11-09_06-14-33
  done: false
  episode_len_mean: 96.92
  episode_media: {}
  episode_reward_max: 14.540000000000015
  episode_reward_mean: 8.322700000000017
  episode_reward_min: 2.6300000000000114
  episodes_this_iter: 21
  episodes_total: 6338
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2939374512150175
          entropy_coeff: 0.009999999999999998
          kl: 0.013703074350247186
          policy_loss: -0.020903648586855048
          total_loss: 0.17679476927788484
          vf_explained_var: 0.9675490856170654
          vf_loss: 0.20283346481266476
    num_agent_steps_sampled: 663336
    num_agent_steps_trained: 663336
    num_steps_sampled: 663336
    num_steps_trained: 66333

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,332,8951.25,663336,8.3227,14.54,2.63,96.92


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 665334
  custom_metrics: {}
  date: 2021-11-09_06-14-59
  done: false
  episode_len_mean: 95.87
  episode_media: {}
  episode_reward_max: 14.540000000000015
  episode_reward_mean: 8.360300000000018
  episode_reward_min: 1.0099999999999991
  episodes_this_iter: 20
  episodes_total: 6358
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3070197990962438
          entropy_coeff: 0.009999999999999998
          kl: 0.020040837735135732
          policy_loss: 0.005487218960410073
          total_loss: 0.29247251625749326
          vf_explained_var: 0.9591438174247742
          vf_loss: 0.2886416089499281
    num_agent_steps_sampled: 665334
    num_agent_steps_trained: 665334
    num_steps_sampled: 665334
    num_steps_trained: 665334


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,333,8977.25,665334,8.3603,14.54,1.01,95.87


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 667332
  custom_metrics: {}
  date: 2021-11-09_06-15-25
  done: false
  episode_len_mean: 96.72
  episode_media: {}
  episode_reward_max: 14.540000000000015
  episode_reward_mean: 8.66310000000002
  episode_reward_min: 1.0099999999999991
  episodes_this_iter: 20
  episodes_total: 6378
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2859246674038114
          entropy_coeff: 0.009999999999999998
          kl: 0.00832389184816152
          policy_loss: 0.03352791920659088
          total_loss: 0.2329613973963119
          vf_explained_var: 0.9737922549247742
          vf_loss: 0.2051816510125285
    num_agent_steps_sampled: 667332
    num_agent_steps_trained: 667332
    num_steps_sampled: 667332
    num_steps_trained: 667332
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,334,9002.82,667332,8.6631,14.54,1.01,96.72




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 669330
  custom_metrics: {}
  date: 2021-11-09_06-16-03
  done: false
  episode_len_mean: 96.13
  episode_media: {}
  episode_reward_max: 14.540000000000015
  episode_reward_mean: 8.74550000000002
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 6399
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3163059626306806
          entropy_coeff: 0.009999999999999998
          kl: 0.008025077558790086
          policy_loss: -0.028611994880650724
          total_loss: 0.42639863810368944
          vf_explained_var: 0.9101011753082275
          vf_loss: 0.46131789185816335
    num_agent_steps_sampled: 669330
    num_agent_steps_trained: 669330
    num_steps_sampled: 669330
    num_steps_trained: 669

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,335,9040.85,669330,8.7455,14.54,-0.06,96.13


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 671328
  custom_metrics: {}
  date: 2021-11-09_06-16-30
  done: false
  episode_len_mean: 97.36
  episode_media: {}
  episode_reward_max: 14.540000000000015
  episode_reward_mean: 8.697000000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 6420
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.267643279688699
          entropy_coeff: 0.009999999999999998
          kl: 0.007584892236445302
          policy_loss: -0.054662521183490756
          total_loss: 0.1688628750365405
          vf_explained_var: 0.9721618890762329
          vf_loss: 0.22972208112478257
    num_agent_steps_sampled: 671328
    num_agent_steps_trained: 671328
    num_steps_sampled: 671328
    num_steps_trained: 6713

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,336,9067.71,671328,8.697,14.54,-0.06,97.36




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 673326
  custom_metrics: {}
  date: 2021-11-09_06-17-12
  done: false
  episode_len_mean: 97.12
  episode_media: {}
  episode_reward_max: 14.510000000000016
  episode_reward_mean: 8.398500000000018
  episode_reward_min: -1.400000000000001
  episodes_this_iter: 21
  episodes_total: 6441
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4582096213386173
          entropy_coeff: 0.009999999999999998
          kl: 0.008916506841736845
          policy_loss: -0.038348451522844176
          total_loss: 0.4431554554473786
          vf_explained_var: 0.9052816033363342
          vf_loss: 0.4884686584273974
    num_agent_steps_sampled: 673326
    num_agent_steps_trained: 673326
    num_steps_sampled: 673326
    num_steps_trained: 673326


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,337,9110.12,673326,8.3985,14.51,-1.4,97.12


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 675324
  custom_metrics: {}
  date: 2021-11-09_06-17-39
  done: false
  episode_len_mean: 96.21
  episode_media: {}
  episode_reward_max: 14.510000000000016
  episode_reward_mean: 8.225200000000015
  episode_reward_min: -1.400000000000001
  episodes_this_iter: 21
  episodes_total: 6462
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4992447484107245
          entropy_coeff: 0.009999999999999998
          kl: 0.009995201436470532
          policy_loss: -0.02190162954585893
          total_loss: 0.15992314967193774
          vf_explained_var: 0.9650133848190308
          vf_loss: 0.18827835616788693
    num_agent_steps_sampled: 675324
    num_agent_steps_trained: 675324
    num_steps_sampled: 675324
    num_steps_trained: 675324

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,338,9136.56,675324,8.2252,14.51,-1.4,96.21


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 677322
  custom_metrics: {}
  date: 2021-11-09_06-18-05
  done: false
  episode_len_mean: 95.56
  episode_media: {}
  episode_reward_max: 14.490000000000016
  episode_reward_mean: 8.031700000000017
  episode_reward_min: -1.400000000000001
  episodes_this_iter: 21
  episodes_total: 6483
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2876351946876163
          entropy_coeff: 0.009999999999999998
          kl: 0.008551069493795652
          policy_loss: -0.03761723641128767
          total_loss: 0.10026918985836562
          vf_explained_var: 0.978560745716095
          vf_loss: 0.14345762470648402
    num_agent_steps_sampled: 677322
    num_agent_steps_trained: 677322
    num_steps_sampled: 677322
    num_steps_trained: 677322


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,339,9162.95,677322,8.0317,14.49,-1.4,95.56


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 679320
  custom_metrics: {}
  date: 2021-11-09_06-18-30
  done: false
  episode_len_mean: 96.27
  episode_media: {}
  episode_reward_max: 14.490000000000016
  episode_reward_mean: 7.819400000000017
  episode_reward_min: -1.400000000000001
  episodes_this_iter: 20
  episodes_total: 6503
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4567826662744794
          entropy_coeff: 0.009999999999999998
          kl: 0.009651758796773552
          policy_loss: -0.006705844473271143
          total_loss: 0.14574650143761012
          vf_explained_var: 0.9760978817939758
          vf_loss: 0.15877470155911785
    num_agent_steps_sampled: 679320
    num_agent_steps_trained: 679320
    num_steps_sampled: 679320
    num_steps_trained: 67932

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,340,9187.72,679320,7.8194,14.49,-1.4,96.27


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 681318
  custom_metrics: {}
  date: 2021-11-09_06-18-54
  done: false
  episode_len_mean: 97.03
  episode_media: {}
  episode_reward_max: 14.490000000000016
  episode_reward_mean: 7.8043000000000164
  episode_reward_min: -1.400000000000001
  episodes_this_iter: 19
  episodes_total: 6522
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.329045041402181
          entropy_coeff: 0.009999999999999998
          kl: 0.00938554731904157
          policy_loss: -0.011068250274374372
          total_loss: 0.23782867406982752
          vf_explained_var: 0.960331916809082
          vf_loss: 0.2541693310297671
    num_agent_steps_sampled: 681318
    num_agent_steps_trained: 681318
    num_steps_sampled: 681318
    num_steps_trained: 681318
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,341,9212.29,681318,7.8043,14.49,-1.4,97.03


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 683316
  custom_metrics: {}
  date: 2021-11-09_06-19-19
  done: false
  episode_len_mean: 98.0
  episode_media: {}
  episode_reward_max: 14.570000000000016
  episode_reward_mean: 8.110500000000018
  episode_reward_min: 2.1500000000000155
  episodes_this_iter: 19
  episodes_total: 6541
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3142667134602866
          entropy_coeff: 0.009999999999999998
          kl: 0.009233032199972869
          policy_loss: -0.09156874189419406
          total_loss: 0.07713589748101575
          vf_explained_var: 0.9787451028823853
          vf_loss: 0.1739595557962145
    num_agent_steps_sampled: 683316
    num_agent_steps_trained: 683316
    num_steps_sampled: 683316
    num_steps_trained: 683316
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,342,9237.34,683316,8.1105,14.57,2.15,98


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 685314
  custom_metrics: {}
  date: 2021-11-09_06-19-45
  done: false
  episode_len_mean: 98.89
  episode_media: {}
  episode_reward_max: 14.570000000000016
  episode_reward_mean: 8.330300000000019
  episode_reward_min: 2.1500000000000155
  episodes_this_iter: 22
  episodes_total: 6563
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.354618962038131
          entropy_coeff: 0.009999999999999998
          kl: 0.008178868044154868
          policy_loss: 0.0027668381198531107
          total_loss: 0.15038413265276523
          vf_explained_var: 0.9754685759544373
          vf_loss: 0.15417630253803163
    num_agent_steps_sampled: 685314
    num_agent_steps_trained: 685314
    num_steps_sampled: 685314
    num_steps_trained: 685314

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,343,9263.02,685314,8.3303,14.57,2.15,98.89


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 687312
  custom_metrics: {}
  date: 2021-11-09_06-20-11
  done: false
  episode_len_mean: 99.21
  episode_media: {}
  episode_reward_max: 14.570000000000016
  episode_reward_mean: 8.169500000000017
  episode_reward_min: 2.1500000000000155
  episodes_this_iter: 20
  episodes_total: 6583
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3509451650437854
          entropy_coeff: 0.009999999999999998
          kl: 0.008142356727243658
          policy_loss: -0.03412926202373845
          total_loss: 0.11646484450570174
          vf_explained_var: 0.9765812754631042
          vf_loss: 0.15714756842880023
    num_agent_steps_sampled: 687312
    num_agent_steps_trained: 687312
    num_steps_sampled: 687312
    num_steps_trained: 687312

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,344,9288.34,687312,8.1695,14.57,2.15,99.21


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 689310
  custom_metrics: {}
  date: 2021-11-09_06-20-35
  done: false
  episode_len_mean: 101.01
  episode_media: {}
  episode_reward_max: 14.570000000000016
  episode_reward_mean: 8.671000000000015
  episode_reward_min: 2.1500000000000155
  episodes_this_iter: 19
  episodes_total: 6602
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2994666207404364
          entropy_coeff: 0.009999999999999998
          kl: 0.011465647870236462
          policy_loss: -0.05158957028761506
          total_loss: 0.07864000403455325
          vf_explained_var: 0.9851568937301636
          vf_loss: 0.13342917466624862
    num_agent_steps_sampled: 689310
    num_agent_steps_trained: 689310
    num_steps_sampled: 689310
    num_steps_trained: 68931

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,345,9312.55,689310,8.671,14.57,2.15,101.01


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 691308
  custom_metrics: {}
  date: 2021-11-09_06-20-59
  done: false
  episode_len_mean: 101.77
  episode_media: {}
  episode_reward_max: 14.570000000000016
  episode_reward_mean: 8.453900000000019
  episode_reward_min: 2.500000000000015
  episodes_this_iter: 18
  episodes_total: 6620
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4190960543496267
          entropy_coeff: 0.009999999999999998
          kl: 0.006798547805403946
          policy_loss: -0.04554038853162811
          total_loss: 0.052021701806890114
          vf_explained_var: 0.9827831387519836
          vf_loss: 0.1059450728552682
    num_agent_steps_sampled: 691308
    num_agent_steps_trained: 691308
    num_steps_sampled: 691308
    num_steps_trained: 691308

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,346,9336.56,691308,8.4539,14.57,2.5,101.77


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 693306
  custom_metrics: {}
  date: 2021-11-09_06-21-24
  done: false
  episode_len_mean: 101.33
  episode_media: {}
  episode_reward_max: 14.500000000000016
  episode_reward_mean: 8.266900000000017
  episode_reward_min: 2.500000000000015
  episodes_this_iter: 21
  episodes_total: 6641
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4049781078384036
          entropy_coeff: 0.009999999999999998
          kl: 0.009915467807996134
          policy_loss: -0.04787975576307092
          total_loss: 0.03697026124046672
          vf_explained_var: 0.9799949526786804
          vf_loss: 0.09042904631545147
    num_agent_steps_sampled: 693306
    num_agent_steps_trained: 693306
    num_steps_sampled: 693306
    num_steps_trained: 693306

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,347,9361.8,693306,8.2669,14.5,2.5,101.33


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 695304
  custom_metrics: {}
  date: 2021-11-09_06-21-50
  done: false
  episode_len_mean: 101.95
  episode_media: {}
  episode_reward_max: 14.500000000000016
  episode_reward_mean: 8.556700000000017
  episode_reward_min: 2.870000000000022
  episodes_this_iter: 20
  episodes_total: 6661
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3062041467144376
          entropy_coeff: 0.009999999999999998
          kl: 0.009532601016797903
          policy_loss: -0.026228107201556366
          total_loss: 0.26253174540719815
          vf_explained_var: 0.9521899819374084
          vf_loss: 0.2936782259848856
    num_agent_steps_sampled: 695304
    num_agent_steps_trained: 695304
    num_steps_sampled: 695304
    num_steps_trained: 695304

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,348,9387.68,695304,8.5567,14.5,2.87,101.95


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 697302
  custom_metrics: {}
  date: 2021-11-09_06-22-15
  done: false
  episode_len_mean: 102.93
  episode_media: {}
  episode_reward_max: 14.500000000000016
  episode_reward_mean: 8.534900000000016
  episode_reward_min: 2.870000000000022
  episodes_this_iter: 18
  episodes_total: 6679
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4574476117179507
          entropy_coeff: 0.009999999999999998
          kl: 0.010030906100699882
          policy_loss: -0.05369256685177485
          total_loss: 0.1406198683638303
          vf_explained_var: 0.9678728580474854
          vf_loss: 0.20031754042775857
    num_agent_steps_sampled: 697302
    num_agent_steps_trained: 697302
    num_steps_sampled: 697302
    num_steps_trained: 697302


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,349,9412.44,697302,8.5349,14.5,2.87,102.93


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 699300
  custom_metrics: {}
  date: 2021-11-09_06-22-40
  done: false
  episode_len_mean: 102.08
  episode_media: {}
  episode_reward_max: 14.500000000000016
  episode_reward_mean: 8.074800000000018
  episode_reward_min: 2.460000000000018
  episodes_this_iter: 21
  episodes_total: 6700
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.32734597495624
          entropy_coeff: 0.009999999999999998
          kl: 0.013927905287505772
          policy_loss: -0.019712889411797125
          total_loss: 0.1565983062476984
          vf_explained_var: 0.9604735374450684
          vf_loss: 0.1776860883725541
    num_agent_steps_sampled: 699300
    num_agent_steps_trained: 699300
    num_steps_sampled: 699300
    num_steps_trained: 699300
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,350,9437.29,699300,8.0748,14.5,2.46,102.08


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 701298
  custom_metrics: {}
  date: 2021-11-09_06-23-06
  done: false
  episode_len_mean: 100.39
  episode_media: {}
  episode_reward_max: 14.500000000000016
  episode_reward_mean: 8.055200000000017
  episode_reward_min: 2.460000000000018
  episodes_this_iter: 19
  episodes_total: 6719
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.337004580384209
          entropy_coeff: 0.009999999999999998
          kl: 0.010735919646094992
          policy_loss: -0.04699248554451125
          total_loss: 0.2308222077432133
          vf_explained_var: 0.9638420939445496
          vf_loss: 0.28201307594066577
    num_agent_steps_sampled: 701298
    num_agent_steps_trained: 701298
    num_steps_sampled: 701298
    num_steps_trained: 701298
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,351,9464.05,701298,8.0552,14.5,2.46,100.39


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 703296
  custom_metrics: {}
  date: 2021-11-09_06-23-32
  done: false
  episode_len_mean: 101.32
  episode_media: {}
  episode_reward_max: 14.500000000000016
  episode_reward_mean: 8.269300000000017
  episode_reward_min: 2.460000000000018
  episodes_this_iter: 20
  episodes_total: 6739
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.346977965037028
          entropy_coeff: 0.009999999999999998
          kl: 0.010482162533732839
          policy_loss: -0.03136878064168351
          total_loss: 0.17050512651247637
          vf_explained_var: 0.9763215780258179
          vf_loss: 0.2063888054163683
    num_agent_steps_sampled: 703296
    num_agent_steps_trained: 703296
    num_steps_sampled: 703296
    num_steps_trained: 703296
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,352,9489.22,703296,8.2693,14.5,2.46,101.32




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 705294
  custom_metrics: {}
  date: 2021-11-09_06-24-15
  done: false
  episode_len_mean: 99.8
  episode_media: {}
  episode_reward_max: 14.500000000000016
  episode_reward_mean: 8.156000000000018
  episode_reward_min: -0.05
  episodes_this_iter: 21
  episodes_total: 6760
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2697730694498335
          entropy_coeff: 0.009999999999999998
          kl: 0.009346902147312002
          policy_loss: -0.0017446626864728474
          total_loss: 0.24181658284117777
          vf_explained_var: 0.9631671905517578
          vf_loss: 0.24827394473056
    num_agent_steps_sampled: 705294
    num_agent_steps_trained: 705294
    num_steps_sampled: 705294
    num_steps_trained: 705294
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,353,9532.77,705294,8.156,14.5,-0.05,99.8




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 707292
  custom_metrics: {}
  date: 2021-11-09_06-24-58
  done: false
  episode_len_mean: 98.97
  episode_media: {}
  episode_reward_max: 14.440000000000015
  episode_reward_mean: 8.250100000000018
  episode_reward_min: -0.05
  episodes_this_iter: 21
  episodes_total: 6781
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3371250163941157
          entropy_coeff: 0.009999999999999998
          kl: 0.01784258970269047
          policy_loss: -0.03203862416779711
          total_loss: 0.21281654173951772
          vf_explained_var: 0.9655543565750122
          vf_loss: 0.24298355091540585
    num_agent_steps_sampled: 707292
    num_agent_steps_trained: 707292
    num_steps_sampled: 707292
    num_steps_trained: 707292
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,354,9575.32,707292,8.2501,14.44,-0.05,98.97




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 709290
  custom_metrics: {}
  date: 2021-11-09_06-25-39
  done: false
  episode_len_mean: 97.3
  episode_media: {}
  episode_reward_max: 14.440000000000015
  episode_reward_mean: 8.341400000000016
  episode_reward_min: -0.05
  episodes_this_iter: 21
  episodes_total: 6802
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3619236162730626
          entropy_coeff: 0.009999999999999998
          kl: 0.008310188970941147
          policy_loss: -0.005224102673431237
          total_loss: 0.18890051015076184
          vf_explained_var: 0.9629426002502441
          vf_loss: 0.20064447798899243
    num_agent_steps_sampled: 709290
    num_agent_steps_trained: 709290
    num_steps_sampled: 709290
    num_steps_trained: 709290
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,355,9616,709290,8.3414,14.44,-0.05,97.3


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 711288
  custom_metrics: {}
  date: 2021-11-09_06-26-04
  done: false
  episode_len_mean: 97.94
  episode_media: {}
  episode_reward_max: 14.440000000000015
  episode_reward_mean: 8.316600000000017
  episode_reward_min: -0.05
  episodes_this_iter: 20
  episodes_total: 6822
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3677930034342267
          entropy_coeff: 0.009999999999999998
          kl: 0.009286569215419394
          policy_loss: -0.028524173476866314
          total_loss: 0.12277876654905932
          vf_explained_var: 0.9665505290031433
          vf_loss: 0.15704738458707218
    num_agent_steps_sampled: 711288
    num_agent_steps_trained: 711288
    num_steps_sampled: 711288
    num_steps_trained: 711288
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,356,9641.4,711288,8.3166,14.44,-0.05,97.94


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 713286
  custom_metrics: {}
  date: 2021-11-09_06-26-30
  done: false
  episode_len_mean: 96.87
  episode_media: {}
  episode_reward_max: 14.520000000000016
  episode_reward_mean: 8.066200000000016
  episode_reward_min: -0.05
  episodes_this_iter: 20
  episodes_total: 6842
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2359523426918757
          entropy_coeff: 0.009999999999999998
          kl: 0.007878049486402463
          policy_loss: -0.00923981439499628
          total_loss: 0.1809174850228287
          vf_explained_var: 0.9630292057991028
          vf_loss: 0.1957866301848775
    num_agent_steps_sampled: 713286
    num_agent_steps_trained: 713286
    num_steps_sampled: 713286
    num_steps_trained: 713286
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,357,9667.22,713286,8.0662,14.52,-0.05,96.87


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 715284
  custom_metrics: {}
  date: 2021-11-09_06-26-56
  done: false
  episode_len_mean: 98.53
  episode_media: {}
  episode_reward_max: 14.520000000000016
  episode_reward_mean: 8.090100000000017
  episode_reward_min: -0.04
  episodes_this_iter: 20
  episodes_total: 6862
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3860928938502357
          entropy_coeff: 0.009999999999999998
          kl: 0.011287990489453158
          policy_loss: -0.03670581402700572
          total_loss: 0.14409142802691177
          vf_explained_var: 0.9641470909118652
          vf_loss: 0.18501487372531777
    num_agent_steps_sampled: 715284
    num_agent_steps_trained: 715284
    num_steps_sampled: 715284
    num_steps_trained: 715284
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,358,9692.88,715284,8.0901,14.52,-0.04,98.53


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 717282
  custom_metrics: {}
  date: 2021-11-09_06-27-21
  done: false
  episode_len_mean: 99.11
  episode_media: {}
  episode_reward_max: 14.520000000000016
  episode_reward_mean: 8.278200000000018
  episode_reward_min: -0.04
  episodes_this_iter: 20
  episodes_total: 6882
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.328961078609739
          entropy_coeff: 0.009999999999999998
          kl: 0.011027151366115937
          policy_loss: -0.05499471787895475
          total_loss: 0.17742119946827492
          vf_explained_var: 0.9680435061454773
          vf_loss: 0.23628506447587694
    num_agent_steps_sampled: 717282
    num_agent_steps_trained: 717282
    num_steps_sampled: 717282
    num_steps_trained: 717282
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,359,9718.54,717282,8.2782,14.52,-0.04,99.11


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 719280
  custom_metrics: {}
  date: 2021-11-09_06-27-47
  done: false
  episode_len_mean: 100.01
  episode_media: {}
  episode_reward_max: 14.520000000000016
  episode_reward_mean: 8.32690000000002
  episode_reward_min: 3.180000000000012
  episodes_this_iter: 21
  episodes_total: 6903
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.291761006627764
          entropy_coeff: 0.009999999999999998
          kl: 0.008530267990291968
          policy_loss: -0.03657809683964366
          total_loss: 0.12853096462786198
          vf_explained_var: 0.9688601493835449
          vf_loss: 0.17073928830879076
    num_agent_steps_sampled: 719280
    num_agent_steps_trained: 719280
    num_steps_sampled: 719280
    num_steps_trained: 719280
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,360,9744.75,719280,8.3269,14.52,3.18,100.01


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 721278
  custom_metrics: {}
  date: 2021-11-09_06-28-13
  done: false
  episode_len_mean: 99.3
  episode_media: {}
  episode_reward_max: 14.520000000000016
  episode_reward_mean: 8.152400000000018
  episode_reward_min: 2.9300000000000126
  episodes_this_iter: 20
  episodes_total: 6923
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2901059462910607
          entropy_coeff: 0.009999999999999998
          kl: 0.01479557778523747
          policy_loss: -0.08897989401150318
          total_loss: 0.17826444576716138
          vf_explained_var: 0.948409914970398
          vf_loss: 0.2675055825994128
    num_agent_steps_sampled: 721278
    num_agent_steps_trained: 721278
    num_steps_sampled: 721278
    num_steps_trained: 721278
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,361,9769.97,721278,8.1524,14.52,2.93,99.3


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 723276
  custom_metrics: {}
  date: 2021-11-09_06-28-38
  done: false
  episode_len_mean: 99.8
  episode_media: {}
  episode_reward_max: 14.510000000000016
  episode_reward_mean: 8.20260000000002
  episode_reward_min: 2.9300000000000126
  episodes_this_iter: 20
  episodes_total: 6943
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2371899071193877
          entropy_coeff: 0.009999999999999998
          kl: 0.010208327613809774
          policy_loss: -0.017023710760154895
          total_loss: 0.14966552932524965
          vf_explained_var: 0.967486560344696
          vf_loss: 0.17034019602551348
    num_agent_steps_sampled: 723276
    num_agent_steps_trained: 723276
    num_steps_sampled: 723276
    num_steps_trained: 723276
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,362,9795.37,723276,8.2026,14.51,2.93,99.8


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 725274
  custom_metrics: {}
  date: 2021-11-09_06-29-03
  done: false
  episode_len_mean: 100.11
  episode_media: {}
  episode_reward_max: 14.510000000000016
  episode_reward_mean: 8.53080000000002
  episode_reward_min: 2.9300000000000126
  episodes_this_iter: 19
  episodes_total: 6962
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2974622039567856
          entropy_coeff: 0.009999999999999998
          kl: 0.006823510482732974
          policy_loss: -0.08477870128339245
          total_loss: 0.020725552320835137
          vf_explained_var: 0.980401337146759
          vf_loss: 0.11264957068931489
    num_agent_steps_sampled: 725274
    num_agent_steps_trained: 725274
    num_steps_sampled: 725274
    num_steps_trained: 725274

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,363,9819.83,725274,8.5308,14.51,2.93,100.11


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 727272
  custom_metrics: {}
  date: 2021-11-09_06-29-29
  done: false
  episode_len_mean: 98.87
  episode_media: {}
  episode_reward_max: 14.520000000000016
  episode_reward_mean: 8.267500000000018
  episode_reward_min: 2.9300000000000126
  episodes_this_iter: 21
  episodes_total: 6983
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2643950519107636
          entropy_coeff: 0.009999999999999998
          kl: 0.010030350485640674
          policy_loss: -0.018617000413082896
          total_loss: 0.24203304335297574
          vf_explained_var: 0.9600182175636292
          vf_loss: 0.26472509636410646
    num_agent_steps_sampled: 727272
    num_agent_steps_trained: 727272
    num_steps_sampled: 727272
    num_steps_trained: 72727

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,364,9846.1,727272,8.2675,14.52,2.93,98.87


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 729270
  custom_metrics: {}
  date: 2021-11-09_06-29-55
  done: false
  episode_len_mean: 98.68
  episode_media: {}
  episode_reward_max: 14.520000000000016
  episode_reward_mean: 8.553000000000019
  episode_reward_min: 2.220000000000018
  episodes_this_iter: 20
  episodes_total: 7003
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2952426933106922
          entropy_coeff: 0.009999999999999998
          kl: 0.010176033108699207
          policy_loss: -0.043855467438697816
          total_loss: 0.3257447286730721
          vf_explained_var: 0.9450478553771973
          vf_loss: 0.37385926966865857
    num_agent_steps_sampled: 729270
    num_agent_steps_trained: 729270
    num_steps_sampled: 729270
    num_steps_trained: 729270


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,365,9871.72,729270,8.553,14.52,2.22,98.68


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 731268
  custom_metrics: {}
  date: 2021-11-09_06-30-21
  done: false
  episode_len_mean: 97.66
  episode_media: {}
  episode_reward_max: 14.520000000000016
  episode_reward_mean: 8.581600000000016
  episode_reward_min: 2.220000000000018
  episodes_this_iter: 23
  episodes_total: 7026
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3253761745634534
          entropy_coeff: 0.009999999999999998
          kl: 0.013130620354748011
          policy_loss: 0.004039045955453601
          total_loss: 0.1399556239623399
          vf_explained_var: 0.9790974259376526
          vf_loss: 0.13795289239801822
    num_agent_steps_sampled: 731268
    num_agent_steps_trained: 731268
    num_steps_sampled: 731268
    num_steps_trained: 731268
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,366,9897.84,731268,8.5816,14.52,2.22,97.66


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 733266
  custom_metrics: {}
  date: 2021-11-09_06-30-46
  done: false
  episode_len_mean: 97.37
  episode_media: {}
  episode_reward_max: 14.520000000000016
  episode_reward_mean: 8.971200000000017
  episode_reward_min: 2.220000000000018
  episodes_this_iter: 19
  episodes_total: 7045
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2074101709184193
          entropy_coeff: 0.009999999999999998
          kl: 0.009544988508930013
          policy_loss: 0.015125659019464539
          total_loss: 0.4125593081604512
          vf_explained_var: 0.9650599956512451
          vf_loss: 0.4013534915411756
    num_agent_steps_sampled: 733266
    num_agent_steps_trained: 733266
    num_steps_sampled: 733266
    num_steps_trained: 733266
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,367,9923.19,733266,8.9712,14.52,2.22,97.37


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 735264
  custom_metrics: {}
  date: 2021-11-09_06-31-12
  done: false
  episode_len_mean: 96.9
  episode_media: {}
  episode_reward_max: 14.520000000000016
  episode_reward_mean: 8.672000000000018
  episode_reward_min: 0.9499999999999991
  episodes_this_iter: 21
  episodes_total: 7066
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.1629855544794174
          entropy_coeff: 0.009999999999999998
          kl: 0.009398041416845944
          policy_loss: 0.0019703096044915063
          total_loss: 0.23039308423619895
          vf_explained_var: 0.9616162776947021
          vf_loss: 0.23202391367937839
    num_agent_steps_sampled: 735264
    num_agent_steps_trained: 735264
    num_steps_sampled: 735264
    num_steps_trained: 735264

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,368,9948.62,735264,8.672,14.52,0.95,96.9


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 737262
  custom_metrics: {}
  date: 2021-11-09_06-31-38
  done: false
  episode_len_mean: 97.14
  episode_media: {}
  episode_reward_max: 14.520000000000016
  episode_reward_mean: 8.526600000000016
  episode_reward_min: 0.9499999999999991
  episodes_this_iter: 20
  episodes_total: 7086
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.1902918023722513
          entropy_coeff: 0.009999999999999998
          kl: 0.007772684363107012
          policy_loss: -0.026767023298002426
          total_loss: 0.1372812455276116
          vf_explained_var: 0.9693616628646851
          vf_loss: 0.16931100714774358
    num_agent_steps_sampled: 737262
    num_agent_steps_trained: 737262
    num_steps_sampled: 737262
    num_steps_trained: 737262

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,369,9974.79,737262,8.5266,14.52,0.95,97.14




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 739260
  custom_metrics: {}
  date: 2021-11-09_06-32-20
  done: false
  episode_len_mean: 96.48
  episode_media: {}
  episode_reward_max: 14.520000000000016
  episode_reward_mean: 8.352400000000017
  episode_reward_min: -0.03
  episodes_this_iter: 21
  episodes_total: 7107
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2993781401997522
          entropy_coeff: 0.009999999999999998
          kl: 0.00752958114513356
          policy_loss: -0.02869471942207643
          total_loss: 0.10600502146851448
          vf_explained_var: 0.9753931164741516
          vf_loss: 0.14126102392162596
    num_agent_steps_sampled: 739260
    num_agent_steps_trained: 739260
    num_steps_sampled: 739260
    num_steps_trained: 739260
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,370,10017.4,739260,8.3524,14.52,-0.03,96.48


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 741258
  custom_metrics: {}
  date: 2021-11-09_06-32-47
  done: false
  episode_len_mean: 97.26
  episode_media: {}
  episode_reward_max: 14.530000000000017
  episode_reward_mean: 8.313100000000016
  episode_reward_min: -0.03
  episodes_this_iter: 21
  episodes_total: 7128
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3266222766467504
          entropy_coeff: 0.009999999999999998
          kl: 0.007844395427312481
          policy_loss: -0.020123177110439254
          total_loss: 0.0974256938412076
          vf_explained_var: 0.9860268831253052
          vf_loss: 0.12411364994588353
    num_agent_steps_sampled: 741258
    num_agent_steps_trained: 741258
    num_steps_sampled: 741258
    num_steps_trained: 741258
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,371,10044.4,741258,8.3131,14.53,-0.03,97.26




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 743256
  custom_metrics: {}
  date: 2021-11-09_06-33-33
  done: false
  episode_len_mean: 93.71
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 8.428600000000015
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 23
  episodes_total: 7151
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2411462244533358
          entropy_coeff: 0.009999999999999998
          kl: 0.0061553175053889065
          policy_loss: -0.008885186218789646
          total_loss: 0.4369311896263666
          vf_explained_var: 0.9654485583305359
          vf_loss: 0.4529693682367603
    num_agent_steps_sampled: 743256
    num_agent_steps_trained: 743256
    num_steps_sampled: 743256
    num_steps_trained: 743

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,372,10089.6,743256,8.4286,14.67,-0.06,93.71


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 745254
  custom_metrics: {}
  date: 2021-11-09_06-33-59
  done: false
  episode_len_mean: 93.22
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 8.453300000000016
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 7172
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8542968749999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2583233958198912
          entropy_coeff: 0.009999999999999998
          kl: 0.004938840548784555
          policy_loss: -0.11705142227666719
          total_loss: -0.02958332992025784
          vf_explained_var: 0.9874986410140991
          vf_loss: 0.09583209030152787
    num_agent_steps_sampled: 745254
    num_agent_steps_trained: 745254
    num_steps_sampled: 745254
    num_steps_trained: 74

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,373,10116.1,745254,8.4533,14.67,-0.06,93.22


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 747252
  custom_metrics: {}
  date: 2021-11-09_06-34-26
  done: false
  episode_len_mean: 92.38
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 8.829700000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 22
  episodes_total: 7194
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484374999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.2108529601778304
          entropy_coeff: 0.009999999999999998
          kl: 0.01017530420786566
          policy_loss: -0.012853812665811606
          total_loss: 0.06091678997590428
          vf_explained_var: 0.9900934100151062
          vf_loss: 0.08153276729974009
    num_agent_steps_sampled: 747252
    num_agent_steps_trained: 747252
    num_steps_sampled: 747252
    num_steps_trained: 747

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,374,10143.3,747252,8.8297,14.67,-0.06,92.38


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 749250
  custom_metrics: {}
  date: 2021-11-09_06-34-51
  done: false
  episode_len_mean: 92.94
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 8.748100000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 7214
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484374999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.313187038898468
          entropy_coeff: 0.009999999999999998
          kl: 0.009939969089122798
          policy_loss: -0.03514497179892801
          total_loss: 0.05038605035238323
          vf_explained_var: 0.9836385250091553
          vf_loss: 0.09441705057840971
    num_agent_steps_sampled: 749250
    num_agent_steps_trained: 749250
    num_steps_sampled: 749250
    num_steps_trained: 7492

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,375,10168.1,749250,8.7481,14.67,-0.06,92.94


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 751248
  custom_metrics: {}
  date: 2021-11-09_06-35-18
  done: false
  episode_len_mean: 94.56
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 8.691100000000016
  episode_reward_min: -0.05
  episodes_this_iter: 21
  episodes_total: 7235
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484374999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.1845461079052517
          entropy_coeff: 0.009999999999999998
          kl: 0.01603909516719592
          policy_loss: -0.06314484592349756
          total_loss: 0.10886140570399307
          vf_explained_var: 0.973111093044281
          vf_loss: 0.17700063674932434
    num_agent_steps_sampled: 751248
    num_agent_steps_trained: 751248
    num_steps_sampled: 751248
    num_steps_trained: 751248
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,376,10195,751248,8.6911,14.67,-0.05,94.56


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 753246
  custom_metrics: {}
  date: 2021-11-09_06-35-45
  done: false
  episode_len_mean: 95.79
  episode_media: {}
  episode_reward_max: 14.620000000000013
  episode_reward_mean: 8.592100000000018
  episode_reward_min: 2.3700000000000268
  episodes_this_iter: 21
  episodes_total: 7256
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484374999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.2728246978351048
          entropy_coeff: 0.009999999999999998
          kl: 0.016430138143281765
          policy_loss: -0.016388282960369472
          total_loss: 0.1488633566136871
          vf_explained_var: 0.9764269590377808
          vf_loss: 0.17096178020749772
    num_agent_steps_sampled: 753246
    num_agent_steps_trained: 753246
    num_steps_sampled: 753246
    num_steps_trained: 753246

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,377,10221.8,753246,8.5921,14.62,2.37,95.79


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 755244
  custom_metrics: {}
  date: 2021-11-09_06-36-11
  done: false
  episode_len_mean: 95.88
  episode_media: {}
  episode_reward_max: 14.540000000000015
  episode_reward_mean: 8.475100000000017
  episode_reward_min: 3.080000000000012
  episodes_this_iter: 21
  episodes_total: 7277
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484374999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.2089885791142783
          entropy_coeff: 0.009999999999999998
          kl: 0.015958141571848904
          policy_loss: -0.004467076488903591
          total_loss: 0.14268618862898577
          vf_explained_var: 0.9773911237716675
          vf_loss: 0.1524266545616445
    num_agent_steps_sampled: 755244
    num_agent_steps_trained: 755244
    num_steps_sampled: 755244
    num_steps_trained: 755244


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,378,10247.3,755244,8.4751,14.54,3.08,95.88


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 757242
  custom_metrics: {}
  date: 2021-11-09_06-36-37
  done: false
  episode_len_mean: 97.28
  episode_media: {}
  episode_reward_max: 14.500000000000016
  episode_reward_mean: 7.897100000000016
  episode_reward_min: 2.8000000000000127
  episodes_this_iter: 20
  episodes_total: 7297
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484374999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.343240771974836
          entropy_coeff: 0.009999999999999998
          kl: 0.010232922892055374
          policy_loss: -0.0460873024449462
          total_loss: 0.044809870599281224
          vf_explained_var: 0.9791662096977234
          vf_loss: 0.09995860395332178
    num_agent_steps_sampled: 757242
    num_agent_steps_trained: 757242
    num_steps_sampled: 757242
    num_steps_trained: 757242


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,379,10273.3,757242,7.8971,14.5,2.8,97.28


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 759240
  custom_metrics: {}
  date: 2021-11-09_06-37-03
  done: false
  episode_len_mean: 96.96
  episode_media: {}
  episode_reward_max: 14.640000000000015
  episode_reward_mean: 8.432100000000018
  episode_reward_min: 2.8000000000000127
  episodes_this_iter: 20
  episodes_total: 7317
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484374999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.2302439831552052
          entropy_coeff: 0.009999999999999998
          kl: 0.01065207100434588
          policy_loss: -0.02228027344903066
          total_loss: 0.09408361245656298
          vf_explained_var: 0.984835684299469
          vf_loss: 0.12411631076108842
    num_agent_steps_sampled: 759240
    num_agent_steps_trained: 759240
    num_steps_sampled: 759240
    num_steps_trained: 759240
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,380,10299.8,759240,8.4321,14.64,2.8,96.96


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 761238
  custom_metrics: {}
  date: 2021-11-09_06-37-28
  done: false
  episode_len_mean: 98.07
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.178200000000016
  episode_reward_min: 2.0500000000000282
  episodes_this_iter: 20
  episodes_total: 7337
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484374999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.247103609641393
          entropy_coeff: 0.009999999999999998
          kl: 0.02158451739597879
          policy_loss: 0.015039046498991194
          total_loss: 0.372407153907365
          vf_explained_var: 0.947981595993042
          vf_loss: 0.360619346602332
    num_agent_steps_sampled: 761238
    num_agent_steps_trained: 761238
    num_steps_sampled: 761238
    num_steps_trained: 761238
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,381,10324.3,761238,8.1782,14.71,2.05,98.07


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 763236
  custom_metrics: {}
  date: 2021-11-09_06-37-54
  done: false
  episode_len_mean: 98.58
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.025700000000016
  episode_reward_min: 2.0500000000000282
  episodes_this_iter: 21
  episodes_total: 7358
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3079442552157812
          entropy_coeff: 0.009999999999999998
          kl: 0.009959911795440278
          policy_loss: 0.021510059528407597
          total_loss: 0.1925450126862242
          vf_explained_var: 0.9657893776893616
          vf_loss: 0.1777328557379189
    num_agent_steps_sampled: 763236
    num_agent_steps_trained: 763236
    num_steps_sampled: 763236
    num_steps_trained: 763236
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,382,10350.3,763236,8.0257,14.71,2.05,98.58


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 765234
  custom_metrics: {}
  date: 2021-11-09_06-38-20
  done: false
  episode_len_mean: 98.0
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.146400000000018
  episode_reward_min: 2.0500000000000282
  episodes_this_iter: 20
  episodes_total: 7378
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.1413159228506542
          entropy_coeff: 0.009999999999999998
          kl: 0.01347899965224408
          policy_loss: -0.0030371525457927157
          total_loss: 0.30970134571017255
          vf_explained_var: 0.9570305943489075
          vf_loss: 0.31551535981042045
    num_agent_steps_sampled: 765234
    num_agent_steps_trained: 765234
    num_steps_sampled: 765234
    num_steps_trained: 765234

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,383,10376.8,765234,8.1464,14.71,2.05,98


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 767232
  custom_metrics: {}
  date: 2021-11-09_06-38-46
  done: false
  episode_len_mean: 97.69
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.638800000000018
  episode_reward_min: 2.0500000000000282
  episodes_this_iter: 21
  episodes_total: 7399
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.1261167375814347
          entropy_coeff: 0.009999999999999998
          kl: 0.013508707634771204
          policy_loss: -0.0075963673669667475
          total_loss: 0.1979995292744466
          vf_explained_var: 0.9682111740112305
          vf_loss: 0.20820172909824622
    num_agent_steps_sampled: 767232
    num_agent_steps_trained: 767232
    num_steps_sampled: 767232
    num_steps_trained: 76723

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,384,10402.9,767232,8.6388,14.71,2.05,97.69


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 769230
  custom_metrics: {}
  date: 2021-11-09_06-39-13
  done: false
  episode_len_mean: 96.73
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.500100000000018
  episode_reward_min: 2.0500000000000282
  episodes_this_iter: 21
  episodes_total: 7420
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.1682595244475773
          entropy_coeff: 0.009999999999999998
          kl: 0.009503856077193288
          policy_loss: -0.06489030032285623
          total_loss: 0.16573498440640314
          vf_explained_var: 0.9663606286048889
          vf_loss: 0.23621854453924157
    num_agent_steps_sampled: 769230
    num_agent_steps_trained: 769230
    num_steps_sampled: 769230
    num_steps_trained: 769230

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,385,10429.2,769230,8.5001,14.71,2.05,96.73


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 771228
  custom_metrics: {}
  date: 2021-11-09_06-39-39
  done: false
  episode_len_mean: 95.59
  episode_media: {}
  episode_reward_max: 14.550000000000015
  episode_reward_mean: 8.353600000000018
  episode_reward_min: 3.0100000000000158
  episodes_this_iter: 20
  episodes_total: 7440
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3737439655122303
          entropy_coeff: 0.009999999999999998
          kl: 0.017195603583422932
          policy_loss: -0.02254209919344811
          total_loss: 0.2659698437012377
          vf_explained_var: 0.9427317380905151
          vf_loss: 0.2912317724454971
    num_agent_steps_sampled: 771228
    num_agent_steps_trained: 771228
    num_steps_sampled: 771228
    num_steps_trained: 771228
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,386,10455.5,771228,8.3536,14.55,3.01,95.59


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 773226
  custom_metrics: {}
  date: 2021-11-09_06-40-05
  done: false
  episode_len_mean: 95.35
  episode_media: {}
  episode_reward_max: 14.550000000000015
  episode_reward_mean: 8.340600000000016
  episode_reward_min: 2.9000000000000123
  episodes_this_iter: 21
  episodes_total: 7461
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2918707858948482
          entropy_coeff: 0.009999999999999998
          kl: 0.009366963359914713
          policy_loss: -0.03605158574701775
          total_loss: 0.053647973585785144
          vf_explained_var: 0.9769076108932495
          vf_loss: 0.09661664266494058
    num_agent_steps_sampled: 773226
    num_agent_steps_trained: 773226
    num_steps_sampled: 773226
    num_steps_trained: 77322

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,387,10481.8,773226,8.3406,14.55,2.9,95.35




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 775224
  custom_metrics: {}
  date: 2021-11-09_06-40-45
  done: false
  episode_len_mean: 95.71
  episode_media: {}
  episode_reward_max: 14.550000000000017
  episode_reward_mean: 8.025900000000016
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 22
  episodes_total: 7483
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2752223304339818
          entropy_coeff: 0.009999999999999998
          kl: 0.010097018698131125
          policy_loss: -0.07199646268217337
          total_loss: 0.18523427038231777
          vf_explained_var: 0.9641606211662292
          vf_loss: 0.2635135681412759
    num_agent_steps_sampled: 775224
    num_agent_steps_trained: 775224
    num_steps_sampled: 775224
    num_steps_trained: 7752

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,388,10521,775224,8.0259,14.55,-0.06,95.71




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 777222
  custom_metrics: {}
  date: 2021-11-09_06-41-42
  done: false
  episode_len_mean: 92.58
  episode_media: {}
  episode_reward_max: 14.550000000000017
  episode_reward_mean: 7.804200000000016
  episode_reward_min: -0.7400000000000007
  episodes_this_iter: 24
  episodes_total: 7507
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2135515814735776
          entropy_coeff: 0.009999999999999998
          kl: 0.017605658452186434
          policy_loss: 0.0004860818031288329
          total_loss: 0.636663262390842
          vf_explained_var: 0.8875007033348083
          vf_loss: 0.6370323502946468
    num_agent_steps_sampled: 777222
    num_agent_steps_trained: 777222
    num_steps_sampled: 777222
    num_steps_trained: 777222


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,389,10578.6,777222,7.8042,14.55,-0.74,92.58


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 779220
  custom_metrics: {}
  date: 2021-11-09_06-42-09
  done: false
  episode_len_mean: 92.27
  episode_media: {}
  episode_reward_max: 14.580000000000016
  episode_reward_mean: 8.072700000000015
  episode_reward_min: -0.7400000000000007
  episodes_this_iter: 21
  episodes_total: 7528
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2448708406516484
          entropy_coeff: 0.009999999999999998
          kl: 0.008672441200102212
          policy_loss: -0.05684287461141745
          total_loss: 0.04345277500826688
          vf_explained_var: 0.9839825630187988
          vf_loss: 0.10718772831772055
    num_agent_steps_sampled: 779220
    num_agent_steps_trained: 779220
    num_steps_sampled: 779220
    num_steps_trained: 77922

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,390,10605.1,779220,8.0727,14.58,-0.74,92.27


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 781218
  custom_metrics: {}
  date: 2021-11-09_06-42-35
  done: false
  episode_len_mean: 92.12
  episode_media: {}
  episode_reward_max: 14.630000000000015
  episode_reward_mean: 8.028500000000015
  episode_reward_min: -0.7400000000000007
  episodes_this_iter: 20
  episodes_total: 7548
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3503769318262735
          entropy_coeff: 0.009999999999999998
          kl: 0.00726591211437631
          policy_loss: -0.02518760391644069
          total_loss: 0.0494277438326251
          vf_explained_var: 0.9833489060401917
          vf_loss: 0.08346368397275607
    num_agent_steps_sampled: 781218
    num_agent_steps_trained: 781218
    num_steps_sampled: 781218
    num_steps_trained: 781218


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,391,10631.1,781218,8.0285,14.63,-0.74,92.12


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 783216
  custom_metrics: {}
  date: 2021-11-09_06-43-01
  done: false
  episode_len_mean: 92.49
  episode_media: {}
  episode_reward_max: 14.630000000000015
  episode_reward_mean: 8.114500000000016
  episode_reward_min: -0.7400000000000007
  episodes_this_iter: 21
  episodes_total: 7569
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3146726148469108
          entropy_coeff: 0.009999999999999998
          kl: 0.010011920091866947
          policy_loss: -0.06417558599440824
          total_loss: 0.09789513211165156
          vf_explained_var: 0.9764108061790466
          vf_loss: 0.1688025795278095
    num_agent_steps_sampled: 783216
    num_agent_steps_trained: 783216
    num_steps_sampled: 783216
    num_steps_trained: 783216

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,392,10657.2,783216,8.1145,14.63,-0.74,92.49


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 785214
  custom_metrics: {}
  date: 2021-11-09_06-43-27
  done: false
  episode_len_mean: 94.77
  episode_media: {}
  episode_reward_max: 14.630000000000015
  episode_reward_mean: 8.243800000000016
  episode_reward_min: -0.7400000000000007
  episodes_this_iter: 20
  episodes_total: 7589
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2181054938407172
          entropy_coeff: 0.009999999999999998
          kl: 0.009479325970725807
          policy_loss: -0.05786646178790501
          total_loss: 0.04466936026832887
          vf_explained_var: 0.9811521768569946
          vf_loss: 0.10864325924998237
    num_agent_steps_sampled: 785214
    num_agent_steps_trained: 785214
    num_steps_sampled: 785214
    num_steps_trained: 78521

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,393,10683,785214,8.2438,14.63,-0.74,94.77


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 787212
  custom_metrics: {}
  date: 2021-11-09_06-43-54
  done: false
  episode_len_mean: 95.85
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.462300000000017
  episode_reward_min: 2.330000000000018
  episodes_this_iter: 23
  episodes_total: 7612
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2461446529343014
          entropy_coeff: 0.009999999999999998
          kl: 0.010156137724948928
          policy_loss: -0.03961278657828059
          total_loss: 0.0742620383699735
          vf_explained_var: 0.9844581484794617
          vf_loss: 0.11982900395634628
    num_agent_steps_sampled: 787212
    num_agent_steps_trained: 787212
    num_steps_sampled: 787212
    num_steps_trained: 787212
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,394,10710.2,787212,8.4623,14.71,2.33,95.85


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 789210
  custom_metrics: {}
  date: 2021-11-09_06-44-20
  done: false
  episode_len_mean: 96.48
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.294900000000018
  episode_reward_min: 2.330000000000018
  episodes_this_iter: 20
  episodes_total: 7632
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2496220577330817
          entropy_coeff: 0.009999999999999998
          kl: 0.008829641443044878
          policy_loss: -0.04183430994550387
          total_loss: 0.09868319304216476
          vf_explained_var: 0.9777666330337524
          vf_loss: 0.14735637406508129
    num_agent_steps_sampled: 789210
    num_agent_steps_trained: 789210
    num_steps_sampled: 789210
    num_steps_trained: 789210


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,395,10736.4,789210,8.2949,14.71,2.33,96.48


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 791208
  custom_metrics: {}
  date: 2021-11-09_06-44-46
  done: false
  episode_len_mean: 96.22
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.136200000000017
  episode_reward_min: 2.330000000000018
  episodes_this_iter: 21
  episodes_total: 7653
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2133542083558582
          entropy_coeff: 0.009999999999999998
          kl: 0.010805565424032252
          policy_loss: 0.0008126323599190939
          total_loss: 0.12000093409525496
          vf_explained_var: 0.9755093455314636
          vf_loss: 0.12439847209801276
    num_agent_steps_sampled: 791208
    num_agent_steps_trained: 791208
    num_steps_sampled: 791208
    num_steps_trained: 791208

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,396,10762.6,791208,8.1362,14.71,2.33,96.22


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 793206
  custom_metrics: {}
  date: 2021-11-09_06-45-13
  done: false
  episode_len_mean: 95.94
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 8.190200000000015
  episode_reward_min: 3.2100000000000106
  episodes_this_iter: 20
  episodes_total: 7673
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2578070328349158
          entropy_coeff: 0.009999999999999998
          kl: 0.00980991599676613
          policy_loss: 0.015582263239082836
          total_loss: 0.12055804723252853
          vf_explained_var: 0.9836856126785278
          vf_loss: 0.11126841730659916
    num_agent_steps_sampled: 793206
    num_agent_steps_trained: 793206
    num_steps_sampled: 793206
    num_steps_trained: 793206


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,397,10788.9,793206,8.1902,14.72,3.21,95.94


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 795204
  custom_metrics: {}
  date: 2021-11-09_06-45-38
  done: false
  episode_len_mean: 95.93
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 7.900000000000017
  episode_reward_min: 2.730000000000016
  episodes_this_iter: 21
  episodes_total: 7694
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.280493706748599
          entropy_coeff: 0.009999999999999998
          kl: 0.01705740267387792
          policy_loss: -0.07840974405407905
          total_loss: 0.1563767411258249
          vf_explained_var: 0.9467495679855347
          vf_loss: 0.23666235502986682
    num_agent_steps_sampled: 795204
    num_agent_steps_trained: 795204
    num_steps_sampled: 795204
    num_steps_trained: 795204
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,398,10814.5,795204,7.9,14.72,2.73,95.93


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 797202
  custom_metrics: {}
  date: 2021-11-09_06-46-04
  done: false
  episode_len_mean: 97.5
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 8.202300000000019
  episode_reward_min: 2.730000000000016
  episodes_this_iter: 20
  episodes_total: 7714
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2619742631912232
          entropy_coeff: 0.009999999999999998
          kl: 0.01052953031928588
          policy_loss: -0.008666105195879936
          total_loss: 0.17466096923287425
          vf_explained_var: 0.9751268029212952
          vf_loss: 0.1892003051581837
    num_agent_steps_sampled: 797202
    num_agent_steps_trained: 797202
    num_steps_sampled: 797202
    num_steps_trained: 797202
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,399,10840.4,797202,8.2023,14.72,2.73,97.5


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 799200
  custom_metrics: {}
  date: 2021-11-09_06-46-29
  done: false
  episode_len_mean: 98.3
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 8.387100000000016
  episode_reward_min: 2.730000000000016
  episodes_this_iter: 19
  episodes_total: 7733
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.215387877963838
          entropy_coeff: 0.009999999999999998
          kl: 0.01218902167666937
          policy_loss: -0.031694786365897884
          total_loss: 0.047141453570553236
          vf_explained_var: 0.9869619011878967
          vf_loss: 0.08318033588251897
    num_agent_steps_sampled: 799200
    num_agent_steps_trained: 799200
    num_steps_sampled: 799200
    num_steps_trained: 799200
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,400,10865.3,799200,8.3871,14.72,2.73,98.3


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 801198
  custom_metrics: {}
  date: 2021-11-09_06-46-54
  done: false
  episode_len_mean: 99.38
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 8.476900000000018
  episode_reward_min: 2.730000000000016
  episodes_this_iter: 19
  episodes_total: 7752
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2580809258279346
          entropy_coeff: 0.009999999999999998
          kl: 0.018079000780740786
          policy_loss: -0.01963418908417225
          total_loss: 0.16433963357870068
          vf_explained_var: 0.9764898419380188
          vf_loss: 0.18497100733220578
    num_agent_steps_sampled: 801198
    num_agent_steps_trained: 801198
    num_steps_sampled: 801198
    num_steps_trained: 801198


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,401,10890.5,801198,8.4769,14.72,2.73,99.38


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 803196
  custom_metrics: {}
  date: 2021-11-09_06-47-20
  done: false
  episode_len_mean: 100.28
  episode_media: {}
  episode_reward_max: 14.580000000000016
  episode_reward_mean: 8.479600000000017
  episode_reward_min: 2.730000000000016
  episodes_this_iter: 20
  episodes_total: 7772
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2938916688873654
          entropy_coeff: 0.009999999999999998
          kl: 0.009681411585500975
          policy_loss: -0.04585397346388726
          total_loss: 0.04602900356763885
          vf_explained_var: 0.9816805124282837
          vf_loss: 0.0986187950264485
    num_agent_steps_sampled: 803196
    num_agent_steps_trained: 803196
    num_steps_sampled: 803196
    num_steps_trained: 803196


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,402,10915.5,803196,8.4796,14.58,2.73,100.28


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 805194
  custom_metrics: {}
  date: 2021-11-09_06-47-44
  done: false
  episode_len_mean: 101.17
  episode_media: {}
  episode_reward_max: 14.580000000000016
  episode_reward_mean: 8.455600000000018
  episode_reward_min: 3.010000000000013
  episodes_this_iter: 20
  episodes_total: 7792
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3185978787285941
          entropy_coeff: 0.009999999999999998
          kl: 0.011115792638237264
          policy_loss: -0.041504238882944695
          total_loss: 0.05638243383949711
          vf_explained_var: 0.9778459668159485
          vf_loss: 0.10395050944671744
    num_agent_steps_sampled: 805194
    num_agent_steps_trained: 805194
    num_steps_sampled: 805194
    num_steps_trained: 80519

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,403,10940.3,805194,8.4556,14.58,3.01,101.17


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 807192
  custom_metrics: {}
  date: 2021-11-09_06-48-09
  done: false
  episode_len_mean: 101.25
  episode_media: {}
  episode_reward_max: 14.630000000000015
  episode_reward_mean: 8.411100000000017
  episode_reward_min: 2.740000000000014
  episodes_this_iter: 20
  episodes_total: 7812
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2801753242810567
          entropy_coeff: 0.009999999999999998
          kl: 0.012990378853131038
          policy_loss: -0.025966522700729824
          total_loss: 0.13356261902621813
          vf_explained_var: 0.9745162129402161
          vf_loss: 0.1640076658378045
    num_agent_steps_sampled: 807192
    num_agent_steps_trained: 807192
    num_steps_sampled: 807192
    num_steps_trained: 807192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,404,10965.3,807192,8.4111,14.63,2.74,101.25




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 809190
  custom_metrics: {}
  date: 2021-11-09_06-48-50
  done: false
  episode_len_mean: 100.64
  episode_media: {}
  episode_reward_max: 14.630000000000015
  episode_reward_mean: 8.279000000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 7832
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.280193333398728
          entropy_coeff: 0.009999999999999998
          kl: 0.01349773338481632
          policy_loss: 0.000969470664858818
          total_loss: 0.4693957239433768
          vf_explained_var: 0.9432649612426758
          vf_loss: 0.47257987654634886
    num_agent_steps_sampled: 809190
    num_agent_steps_trained: 809190
    num_steps_sampled: 809190
    num_steps_trained: 80919

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,405,11005.8,809190,8.279,14.63,-0.06,100.64




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 811188
  custom_metrics: {}
  date: 2021-11-09_06-49-33
  done: false
  episode_len_mean: 98.91
  episode_media: {}
  episode_reward_max: 14.650000000000015
  episode_reward_mean: 8.384100000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 22
  episodes_total: 7854
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.228617787361145
          entropy_coeff: 0.009999999999999998
          kl: 0.01376866967884405
          policy_loss: -0.018561021770749773
          total_loss: 0.1489789008384659
          vf_explained_var: 0.9749252796173096
          vf_loss: 0.17100420218138468
    num_agent_steps_sampled: 811188
    num_agent_steps_trained: 811188
    num_steps_sampled: 811188
    num_steps_trained: 81118

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,406,11048.5,811188,8.3841,14.65,-0.06,98.91


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 813186
  custom_metrics: {}
  date: 2021-11-09_06-49-59
  done: false
  episode_len_mean: 99.26
  episode_media: {}
  episode_reward_max: 14.650000000000015
  episode_reward_mean: 8.539100000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 7873
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.299733733563196
          entropy_coeff: 0.009999999999999998
          kl: 0.011082374084094604
          policy_loss: -0.04705457086009639
          total_loss: 0.108003790703203
          vf_explained_var: 0.9779105186462402
          vf_loss: 0.16095497044069426
    num_agent_steps_sampled: 813186
    num_agent_steps_trained: 813186
    num_steps_sampled: 813186
    num_steps_trained: 813186

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,407,11074.5,813186,8.5391,14.65,-0.06,99.26


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 815184
  custom_metrics: {}
  date: 2021-11-09_06-50-25
  done: false
  episode_len_mean: 98.29
  episode_media: {}
  episode_reward_max: 14.650000000000015
  episode_reward_mean: 8.935200000000016
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 7894
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2366269117309934
          entropy_coeff: 0.009999999999999998
          kl: 0.007872224169970618
          policy_loss: -0.09762081010710626
          total_loss: -0.017524908917645615
          vf_explained_var: 0.9901213049888611
          vf_loss: 0.0874182596714014
    num_agent_steps_sampled: 815184
    num_agent_steps_trained: 815184
    num_steps_sampled: 815184
    num_steps_trained: 81

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,408,11100.7,815184,8.9352,14.65,-0.06,98.29


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 817182
  custom_metrics: {}
  date: 2021-11-09_06-50-50
  done: false
  episode_len_mean: 98.32
  episode_media: {}
  episode_reward_max: 14.650000000000015
  episode_reward_mean: 8.935400000000019
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 7914
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2799547428176516
          entropy_coeff: 0.009999999999999998
          kl: 0.009670664596072078
          policy_loss: -0.017977344147151426
          total_loss: 0.12504560759379751
          vf_explained_var: 0.9803187251091003
          vf_loss: 0.14962628560052033
    num_agent_steps_sampled: 817182
    num_agent_steps_trained: 817182
    num_steps_sampled: 817182
    num_steps_trained: 81

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,409,11125.3,817182,8.9354,14.65,-0.06,98.32


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 819180
  custom_metrics: {}
  date: 2021-11-09_06-51-16
  done: false
  episode_len_mean: 98.92
  episode_media: {}
  episode_reward_max: 14.650000000000015
  episode_reward_mean: 8.673500000000017
  episode_reward_min: -0.05
  episodes_this_iter: 20
  episodes_total: 7934
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.38279067221142
          entropy_coeff: 0.009999999999999998
          kl: 0.008670273432954503
          policy_loss: -0.05139267877453849
          total_loss: 0.019913162610360555
          vf_explained_var: 0.9834539294242859
          vf_loss: 0.07957850828589429
    num_agent_steps_sampled: 819180
    num_agent_steps_trained: 819180
    num_steps_sampled: 819180
    num_steps_trained: 819180
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,410,11151.3,819180,8.6735,14.65,-0.05,98.92


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 821178
  custom_metrics: {}
  date: 2021-11-09_06-51-40
  done: false
  episode_len_mean: 101.45
  episode_media: {}
  episode_reward_max: 14.650000000000015
  episode_reward_mean: 8.664400000000018
  episode_reward_min: 2.8000000000000154
  episodes_this_iter: 18
  episodes_total: 7952
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2979766039621263
          entropy_coeff: 0.009999999999999998
          kl: 0.011813193432445619
          policy_loss: -0.0272248621852625
          total_loss: 0.0611528115169633
          vf_explained_var: 0.9824846386909485
          vf_loss: 0.09378845755543028
    num_agent_steps_sampled: 821178
    num_agent_steps_trained: 821178
    num_steps_sampled: 821178
    num_steps_trained: 821178


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,411,11175.1,821178,8.6644,14.65,2.8,101.45


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 823176
  custom_metrics: {}
  date: 2021-11-09_06-52-03
  done: false
  episode_len_mean: 101.68
  episode_media: {}
  episode_reward_max: 14.650000000000015
  episode_reward_mean: 8.506600000000018
  episode_reward_min: 2.920000000000017
  episodes_this_iter: 19
  episodes_total: 7971
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4076535065968832
          entropy_coeff: 0.009999999999999998
          kl: 0.010901037887165578
          policy_loss: -0.027600756252095814
          total_loss: 0.12173567596113398
          vf_explained_var: 0.9529616832733154
          vf_loss: 0.15642842641543775
    num_agent_steps_sampled: 823176
    num_agent_steps_trained: 823176
    num_steps_sampled: 823176
    num_steps_trained: 82317

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,412,11198.7,823176,8.5066,14.65,2.92,101.68


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 825174
  custom_metrics: {}
  date: 2021-11-09_06-52-28
  done: false
  episode_len_mean: 102.44
  episode_media: {}
  episode_reward_max: 14.650000000000015
  episode_reward_mean: 8.323900000000018
  episode_reward_min: 2.480000000000018
  episodes_this_iter: 18
  episodes_total: 7989
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2638321785699753
          entropy_coeff: 0.009999999999999998
          kl: 0.008400330113862878
          policy_loss: -0.007002430972421453
          total_loss: 0.07941056350689558
          vf_explained_var: 0.9797884821891785
          vf_loss: 0.09366903441647688
    num_agent_steps_sampled: 825174
    num_agent_steps_trained: 825174
    num_steps_sampled: 825174
    num_steps_trained: 82517

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,413,11223.4,825174,8.3239,14.65,2.48,102.44


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 827172
  custom_metrics: {}
  date: 2021-11-09_06-52-52
  done: false
  episode_len_mean: 104.13
  episode_media: {}
  episode_reward_max: 14.610000000000015
  episode_reward_mean: 8.14640000000002
  episode_reward_min: 2.480000000000018
  episodes_this_iter: 21
  episodes_total: 8010
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.1841094468321118
          entropy_coeff: 0.009999999999999998
          kl: 0.015791900808864816
          policy_loss: 0.01256749461449328
          total_loss: 0.24537519751826212
          vf_explained_var: 0.9734530448913574
          vf_loss: 0.23453056964845884
    num_agent_steps_sampled: 827172
    num_agent_steps_trained: 827172
    num_steps_sampled: 827172
    num_steps_trained: 827172
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,414,11248,827172,8.1464,14.61,2.48,104.13


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 829170
  custom_metrics: {}
  date: 2021-11-09_06-53-17
  done: false
  episode_len_mean: 104.54
  episode_media: {}
  episode_reward_max: 14.610000000000015
  episode_reward_mean: 8.626200000000019
  episode_reward_min: 2.480000000000018
  episodes_this_iter: 18
  episodes_total: 8028
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3123393881888616
          entropy_coeff: 0.009999999999999998
          kl: 0.012713521346719723
          policy_loss: -0.03468669963379701
          total_loss: 0.19047703651622647
          vf_explained_var: 0.9542067646980286
          vf_loss: 0.23014128963862146
    num_agent_steps_sampled: 829170
    num_agent_steps_trained: 829170
    num_steps_sampled: 829170
    num_steps_trained: 829170

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,415,11272,829170,8.6262,14.61,2.48,104.54


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 831168
  custom_metrics: {}
  date: 2021-11-09_06-53-41
  done: false
  episode_len_mean: 104.9
  episode_media: {}
  episode_reward_max: 14.610000000000015
  episode_reward_mean: 8.299500000000021
  episode_reward_min: 2.480000000000018
  episodes_this_iter: 20
  episodes_total: 8048
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.368442367939722
          entropy_coeff: 0.009999999999999998
          kl: 0.011503087752780632
          policy_loss: -0.016977501997635477
          total_loss: 0.0576338257817995
          vf_explained_var: 0.9809182286262512
          vf_loss: 0.08092546328192665
    num_agent_steps_sampled: 831168
    num_agent_steps_trained: 831168
    num_steps_sampled: 831168
    num_steps_trained: 831168
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,416,11296.5,831168,8.2995,14.61,2.48,104.9


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 833166
  custom_metrics: {}
  date: 2021-11-09_06-54-05
  done: false
  episode_len_mean: 105.35
  episode_media: {}
  episode_reward_max: 14.610000000000015
  episode_reward_mean: 8.61700000000002
  episode_reward_min: 2.480000000000018
  episodes_this_iter: 18
  episodes_total: 8066
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2639183895928519
          entropy_coeff: 0.009999999999999998
          kl: 0.010495470195156516
          policy_loss: -0.03882918863424233
          total_loss: 0.08171698105122362
          vf_explained_var: 0.9788516759872437
          vf_loss: 0.12646066573049342
    num_agent_steps_sampled: 833166
    num_agent_steps_trained: 833166
    num_steps_sampled: 833166
    num_steps_trained: 833166


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,417,11320.7,833166,8.617,14.61,2.48,105.35


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 835164
  custom_metrics: {}
  date: 2021-11-09_06-54-31
  done: false
  episode_len_mean: 104.28
  episode_media: {}
  episode_reward_max: 14.610000000000015
  episode_reward_mean: 8.706300000000018
  episode_reward_min: 2.690000000000021
  episodes_this_iter: 21
  episodes_total: 8087
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3064576552027747
          entropy_coeff: 0.009999999999999998
          kl: 0.010793478106858686
          policy_loss: -0.0004863884122598739
          total_loss: 0.11826232217607044
          vf_explained_var: 0.9777764678001404
          vf_loss: 0.1248976606875658
    num_agent_steps_sampled: 835164
    num_agent_steps_trained: 835164
    num_steps_sampled: 835164
    num_steps_trained: 83516

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,418,11346.2,835164,8.7063,14.61,2.69,104.28


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 837162
  custom_metrics: {}
  date: 2021-11-09_06-54-55
  done: false
  episode_len_mean: 105.41
  episode_media: {}
  episode_reward_max: 14.580000000000016
  episode_reward_mean: 8.56260000000002
  episode_reward_min: 2.690000000000021
  episodes_this_iter: 17
  episodes_total: 8104
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2993343551953633
          entropy_coeff: 0.009999999999999998
          kl: 0.013266224586929505
          policy_loss: -0.055401390480498476
          total_loss: 0.07014920947452387
          vf_explained_var: 0.9744377732276917
          vf_loss: 0.13004397341892832
    num_agent_steps_sampled: 837162
    num_agent_steps_trained: 837162
    num_steps_sampled: 837162
    num_steps_trained: 837162

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,419,11370.2,837162,8.5626,14.58,2.69,105.41


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 839160
  custom_metrics: {}
  date: 2021-11-09_06-55-17
  done: false
  episode_len_mean: 106.03
  episode_media: {}
  episode_reward_max: 14.580000000000016
  episode_reward_mean: 8.182000000000022
  episode_reward_min: 2.690000000000021
  episodes_this_iter: 18
  episodes_total: 8122
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3534541856674922
          entropy_coeff: 0.009999999999999998
          kl: 0.011876924432008383
          policy_loss: -0.031938826275013744
          total_loss: 0.09033903788243021
          vf_explained_var: 0.9702147841453552
          vf_loss: 0.12820259167679718
    num_agent_steps_sampled: 839160
    num_agent_steps_trained: 839160
    num_steps_sampled: 839160
    num_steps_trained: 83916

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,420,11392.8,839160,8.182,14.58,2.69,106.03


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 841158
  custom_metrics: {}
  date: 2021-11-09_06-55-43
  done: false
  episode_len_mean: 105.01
  episode_media: {}
  episode_reward_max: 14.630000000000015
  episode_reward_mean: 8.368400000000019
  episode_reward_min: 2.690000000000021
  episodes_this_iter: 20
  episodes_total: 8142
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3343083319209872
          entropy_coeff: 0.009999999999999998
          kl: 0.011040620727712813
          policy_loss: 0.013219636394864037
          total_loss: 0.1218098952568003
          vf_explained_var: 0.9804270267486572
          vf_loss: 0.11485936721520765
    num_agent_steps_sampled: 841158
    num_agent_steps_trained: 841158
    num_steps_sampled: 841158
    num_steps_trained: 841158


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,421,11418.5,841158,8.3684,14.63,2.69,105.01


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 843156
  custom_metrics: {}
  date: 2021-11-09_06-56-08
  done: false
  episode_len_mean: 103.21
  episode_media: {}
  episode_reward_max: 14.650000000000015
  episode_reward_mean: 8.524100000000018
  episode_reward_min: 2.690000000000021
  episodes_this_iter: 20
  episodes_total: 8162
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2740879626501174
          entropy_coeff: 0.009999999999999998
          kl: 0.013632453036077902
          policy_loss: -0.033308087129678046
          total_loss: 0.13723059195492948
          vf_explained_var: 0.9765623807907104
          vf_loss: 0.17454493624113854
    num_agent_steps_sampled: 843156
    num_agent_steps_trained: 843156
    num_steps_sampled: 843156
    num_steps_trained: 84315

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,422,11443.6,843156,8.5241,14.65,2.69,103.21




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 845154
  custom_metrics: {}
  date: 2021-11-09_06-56-51
  done: false
  episode_len_mean: 102.68
  episode_media: {}
  episode_reward_max: 14.650000000000015
  episode_reward_mean: 8.881300000000019
  episode_reward_min: 3.5100000000000224
  episodes_this_iter: 21
  episodes_total: 8183
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2350294056392852
          entropy_coeff: 0.009999999999999998
          kl: 0.009248174845749387
          policy_loss: -0.05160268202778839
          total_loss: 0.028123614192008973
          vf_explained_var: 0.9883455038070679
          vf_loss: 0.08615107635656992
    num_agent_steps_sampled: 845154
    num_agent_steps_trained: 845154
    num_steps_sampled: 845154
    num_steps_trained: 8451

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,423,11486.1,845154,8.8813,14.65,3.51,102.68


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 847152
  custom_metrics: {}
  date: 2021-11-09_06-57-16
  done: false
  episode_len_mean: 101.25
  episode_media: {}
  episode_reward_max: 14.650000000000015
  episode_reward_mean: 8.934400000000018
  episode_reward_min: 4.130000000000017
  episodes_this_iter: 20
  episodes_total: 8203
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2445553038801467
          entropy_coeff: 0.009999999999999998
          kl: 0.017085167369654628
          policy_loss: -0.03744042838613192
          total_loss: 0.18421849005279087
          vf_explained_var: 0.971994161605835
          vf_loss: 0.22315761744976043
    num_agent_steps_sampled: 847152
    num_agent_steps_trained: 847152
    num_steps_sampled: 847152
    num_steps_trained: 847152


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,424,11511.5,847152,8.9344,14.65,4.13,101.25




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 849150
  custom_metrics: {}
  date: 2021-11-09_06-57-57
  done: false
  episode_len_mean: 100.41
  episode_media: {}
  episode_reward_max: 14.650000000000015
  episode_reward_mean: 8.929700000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 8222
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3041784746306284
          entropy_coeff: 0.009999999999999998
          kl: 0.013029803489814877
          policy_loss: -0.01962471501458259
          total_loss: 0.2063611042951899
          vf_explained_var: 0.9402035474777222
          vf_loss: 0.23067911137782393
    num_agent_steps_sampled: 849150
    num_agent_steps_trained: 849150
    num_steps_sampled: 849150
    num_steps_trained: 849

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,425,11551.8,849150,8.9297,14.65,-0.06,100.41


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 851148
  custom_metrics: {}
  date: 2021-11-09_06-58-26
  done: false
  episode_len_mean: 100.9
  episode_media: {}
  episode_reward_max: 14.650000000000015
  episode_reward_mean: 8.824600000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 8241
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2948133724076407
          entropy_coeff: 0.009999999999999998
          kl: 0.011209170393913379
          policy_loss: -0.06652404630468005
          total_loss: 0.13513188254797742
          vf_explained_var: 0.9644115567207336
          vf_loss: 0.20742209135066894
    num_agent_steps_sampled: 851148
    num_agent_steps_trained: 851148
    num_steps_sampled: 851148
    num_steps_trained: 851

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,426,11581.1,851148,8.8246,14.65,-0.06,100.9


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 853146
  custom_metrics: {}
  date: 2021-11-09_06-58-51
  done: false
  episode_len_mean: 100.44
  episode_media: {}
  episode_reward_max: 14.630000000000015
  episode_reward_mean: 8.377700000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 8261
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.257682112285069
          entropy_coeff: 0.009999999999999998
          kl: 0.010802077875397623
          policy_loss: -0.018697171702626206
          total_loss: 0.1384780718634526
          vf_explained_var: 0.9639459252357483
          vf_loss: 0.16283092981293087
    num_agent_steps_sampled: 853146
    num_agent_steps_trained: 853146
    num_steps_sampled: 853146
    num_steps_trained: 853

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,427,11605.7,853146,8.3777,14.63,-0.06,100.44


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 855144
  custom_metrics: {}
  date: 2021-11-09_06-59-17
  done: false
  episode_len_mean: 100.91
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 7.935800000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 8282
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2540054168019976
          entropy_coeff: 0.009999999999999998
          kl: 0.016466703702715062
          policy_loss: -0.05314311259204433
          total_loss: 0.43246833106414195
          vf_explained_var: 0.9183961153030396
          vf_loss: 0.48760090778980936
    num_agent_steps_sampled: 855144
    num_agent_steps_trained: 855144
    num_steps_sampled: 855144
    num_steps_trained: 85

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,428,11632.1,855144,7.9358,14.67,-0.06,100.91


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 857142
  custom_metrics: {}
  date: 2021-11-09_06-59-42
  done: false
  episode_len_mean: 100.66
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 7.644000000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 8302
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3736466498602005
          entropy_coeff: 0.009999999999999998
          kl: 0.012046689950730402
          policy_loss: -0.0406065560167744
          total_loss: 0.14135994128882884
          vf_explained_var: 0.9644603729248047
          vf_loss: 0.1879843771457672
    num_agent_steps_sampled: 857142
    num_agent_steps_trained: 857142
    num_steps_sampled: 857142
    num_steps_trained: 8571

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,429,11657.4,857142,7.644,14.67,-0.06,100.66


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 859140
  custom_metrics: {}
  date: 2021-11-09_07-00-08
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 7.895300000000017
  episode_reward_min: 2.660000000000018
  episodes_this_iter: 20
  episodes_total: 8322
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3226481630688622
          entropy_coeff: 0.009999999999999998
          kl: 0.010547889684290842
          policy_loss: -0.0341452664562634
          total_loss: 0.13722724876410905
          vf_explained_var: 0.9684545397758484
          vf_loss: 0.17784072620173294
    num_agent_steps_sampled: 859140
    num_agent_steps_trained: 859140
    num_steps_sampled: 859140
    num_steps_trained: 859140
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,430,11682.8,859140,7.8953,14.67,2.66,100


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 861138
  custom_metrics: {}
  date: 2021-11-09_07-00-32
  done: false
  episode_len_mean: 99.54
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 8.275300000000017
  episode_reward_min: 2.660000000000018
  episodes_this_iter: 20
  episodes_total: 8342
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2728734731674194
          entropy_coeff: 0.009999999999999998
          kl: 0.01261414682110077
          policy_loss: -0.037955029770022346
          total_loss: 0.17698948079099258
          vf_explained_var: 0.9669911861419678
          vf_loss: 0.21959107763000896
    num_agent_steps_sampled: 861138
    num_agent_steps_trained: 861138
    num_steps_sampled: 861138
    num_steps_trained: 861138


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,431,11707,861138,8.2753,14.67,2.66,99.54


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 863136
  custom_metrics: {}
  date: 2021-11-09_07-00-58
  done: false
  episode_len_mean: 99.64
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 8.713000000000017
  episode_reward_min: 2.660000000000018
  episodes_this_iter: 20
  episodes_total: 8362
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2432118787651971
          entropy_coeff: 0.009999999999999998
          kl: 0.01322413916937298
          policy_loss: -0.02172368899697349
          total_loss: 0.17028282664804942
          vf_explained_var: 0.9657986164093018
          vf_loss: 0.19596562818402335
    num_agent_steps_sampled: 863136
    num_agent_steps_trained: 863136
    num_steps_sampled: 863136
    num_steps_trained: 863136
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,432,11733.1,863136,8.713,14.67,2.66,99.64


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 865134
  custom_metrics: {}
  date: 2021-11-09_07-01-22
  done: false
  episode_len_mean: 100.59
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.923800000000018
  episode_reward_min: 2.660000000000018
  episodes_this_iter: 19
  episodes_total: 8381
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2472948196388427
          entropy_coeff: 0.009999999999999998
          kl: 0.01186958005634111
          policy_loss: -0.03293538978766827
          total_loss: 0.15816703220563275
          vf_explained_var: 0.9772840142250061
          vf_loss: 0.19597026198392822
    num_agent_steps_sampled: 865134
    num_agent_steps_trained: 865134
    num_steps_sampled: 865134
    num_steps_trained: 865134


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,433,11757,865134,8.9238,14.68,2.66,100.59


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 867132
  custom_metrics: {}
  date: 2021-11-09_07-01-45
  done: false
  episode_len_mean: 102.91
  episode_media: {}
  episode_reward_max: 14.760000000000014
  episode_reward_mean: 8.895800000000019
  episode_reward_min: 4.060000000000022
  episodes_this_iter: 18
  episodes_total: 8399
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3193609498796008
          entropy_coeff: 0.009999999999999998
          kl: 0.010917068920198609
          policy_loss: -0.08016903385342587
          total_loss: 0.11310852020978927
          vf_explained_var: 0.9539571404457092
          vf_loss: 0.1994763530613411
    num_agent_steps_sampled: 867132
    num_agent_steps_trained: 867132
    num_steps_sampled: 867132
    num_steps_trained: 867132


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,434,11780.1,867132,8.8958,14.76,4.06,102.91


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 869130
  custom_metrics: {}
  date: 2021-11-09_07-02-11
  done: false
  episode_len_mean: 102.16
  episode_media: {}
  episode_reward_max: 14.760000000000014
  episode_reward_mean: 8.788700000000018
  episode_reward_min: 4.060000000000022
  episodes_this_iter: 21
  episodes_total: 8420
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3628725863638378
          entropy_coeff: 0.009999999999999998
          kl: 0.010978983998114498
          policy_loss: -0.04050708803392592
          total_loss: 0.06939573369565465
          vf_explained_var: 0.9782428741455078
          vf_loss: 0.11649706262562956
    num_agent_steps_sampled: 869130
    num_agent_steps_trained: 869130
    num_steps_sampled: 869130
    num_steps_trained: 869130

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,435,11806.1,869130,8.7887,14.76,4.06,102.16


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 871128
  custom_metrics: {}
  date: 2021-11-09_07-02-35
  done: false
  episode_len_mean: 103.14
  episode_media: {}
  episode_reward_max: 14.760000000000014
  episode_reward_mean: 8.683600000000018
  episode_reward_min: 1.9100000000000172
  episodes_this_iter: 18
  episodes_total: 8438
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.280241117307118
          entropy_coeff: 0.009999999999999998
          kl: 0.010514067952974307
          policy_loss: -0.062193199335819196
          total_loss: 0.12816350899991535
          vf_explained_var: 0.9486806988716125
          vf_loss: 0.1964225145322936
    num_agent_steps_sampled: 871128
    num_agent_steps_trained: 871128
    num_steps_sampled: 871128
    num_steps_trained: 871128

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,436,11830.2,871128,8.6836,14.76,1.91,103.14


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 873126
  custom_metrics: {}
  date: 2021-11-09_07-03-00
  done: false
  episode_len_mean: 103.93
  episode_media: {}
  episode_reward_max: 14.760000000000014
  episode_reward_mean: 8.572100000000018
  episode_reward_min: 1.9100000000000172
  episodes_this_iter: 20
  episodes_total: 8458
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2579022861662366
          entropy_coeff: 0.009999999999999998
          kl: 0.012505312282379338
          policy_loss: -0.0541421020314807
          total_loss: 0.12715768556864487
          vf_explained_var: 0.9772593975067139
          vf_loss: 0.18586637393704483
    num_agent_steps_sampled: 873126
    num_agent_steps_trained: 873126
    num_steps_sampled: 873126
    num_steps_trained: 873126

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,437,11854.9,873126,8.5721,14.76,1.91,103.93


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 875124
  custom_metrics: {}
  date: 2021-11-09_07-03-26
  done: false
  episode_len_mean: 102.31
  episode_media: {}
  episode_reward_max: 14.760000000000014
  episode_reward_mean: 8.45800000000002
  episode_reward_min: 1.9100000000000172
  episodes_this_iter: 21
  episodes_total: 8479
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.209130319811049
          entropy_coeff: 0.009999999999999998
          kl: 0.015486884591652754
          policy_loss: -0.027459038368293217
          total_loss: 0.1132349994477062
          vf_explained_var: 0.9722762107849121
          vf_loss: 0.14286253989807196
    num_agent_steps_sampled: 875124
    num_agent_steps_trained: 875124
    num_steps_sampled: 875124
    num_steps_trained: 875124


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,438,11880.7,875124,8.458,14.76,1.91,102.31


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 877122
  custom_metrics: {}
  date: 2021-11-09_07-03-52
  done: false
  episode_len_mean: 99.65
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.56180000000002
  episode_reward_min: 1.9100000000000172
  episodes_this_iter: 20
  episodes_total: 8499
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2996448017302014
          entropy_coeff: 0.009999999999999998
          kl: 0.014067974747396976
          policy_loss: -0.012072014081336202
          total_loss: 0.16946706748789264
          vf_explained_var: 0.9550136923789978
          vf_loss: 0.18552186029652754
    num_agent_steps_sampled: 877122
    num_agent_steps_trained: 877122
    num_steps_sampled: 877122
    num_steps_trained: 877122

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,439,11907,877122,8.5618,14.71,1.91,99.65


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 879120
  custom_metrics: {}
  date: 2021-11-09_07-04-17
  done: false
  episode_len_mean: 101.12
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.487000000000018
  episode_reward_min: 1.9100000000000172
  episodes_this_iter: 19
  episodes_total: 8518
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.335055118515378
          entropy_coeff: 0.009999999999999998
          kl: 0.009097738649791227
          policy_loss: -0.02138244781110968
          total_loss: 0.10030796584628877
          vf_explained_var: 0.9583998918533325
          vf_loss: 0.1292118385522848
    num_agent_steps_sampled: 879120
    num_agent_steps_trained: 879120
    num_steps_sampled: 879120
    num_steps_trained: 879120


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,440,11931.6,879120,8.487,14.71,1.91,101.12




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 881118
  custom_metrics: {}
  date: 2021-11-09_07-04-58
  done: false
  episode_len_mean: 99.48
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.430100000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 8539
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2362203822249458
          entropy_coeff: 0.009999999999999998
          kl: 0.011674692229468915
          policy_loss: -0.040860469373209134
          total_loss: 0.17876223039236808
          vf_explained_var: 0.972020149230957
          vf_loss: 0.22450466022959778
    num_agent_steps_sampled: 881118
    num_agent_steps_trained: 881118
    num_steps_sampled: 881118
    num_steps_trained: 881

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,441,11972.5,881118,8.4301,14.71,-0.06,99.48


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 883116
  custom_metrics: {}
  date: 2021-11-09_07-05-25
  done: false
  episode_len_mean: 98.82
  episode_media: {}
  episode_reward_max: 14.720000000000015
  episode_reward_mean: 8.316100000000016
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 8560
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2324049745287213
          entropy_coeff: 0.009999999999999998
          kl: 0.010695129676119632
          policy_loss: -0.015739337479074796
          total_loss: 0.10940273389929817
          vf_explained_var: 0.9821810126304626
          vf_loss: 0.13061351225312268
    num_agent_steps_sampled: 883116
    num_agent_steps_trained: 883116
    num_steps_sampled: 883116
    num_steps_trained: 88

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,442,11999.9,883116,8.3161,14.72,-0.06,98.82




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 885114
  custom_metrics: {}
  date: 2021-11-09_07-06-07
  done: false
  episode_len_mean: 97.59
  episode_media: {}
  episode_reward_max: 14.720000000000015
  episode_reward_mean: 8.195400000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 22
  episodes_total: 8582
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2602667956125169
          entropy_coeff: 0.009999999999999998
          kl: 0.00945308154882432
          policy_loss: -0.013000062906316348
          total_loss: 0.30182505347544236
          vf_explained_var: 0.8919237852096558
          vf_loss: 0.321370984152669
    num_agent_steps_sampled: 885114
    num_agent_steps_trained: 885114
    num_steps_sampled: 885114
    num_steps_trained: 88511

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,443,12041.4,885114,8.1954,14.72,-0.06,97.59


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 887112
  custom_metrics: {}
  date: 2021-11-09_07-06-31
  done: false
  episode_len_mean: 98.42
  episode_media: {}
  episode_reward_max: 14.720000000000015
  episode_reward_mean: 8.047600000000019
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 18
  episodes_total: 8600
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.25073219197137
          entropy_coeff: 0.009999999999999998
          kl: 0.011852083198262477
          policy_loss: -0.030283064856415704
          total_loss: 0.15210716026790794
          vf_explained_var: 0.9653869867324829
          vf_loss: 0.18730364626362211
    num_agent_steps_sampled: 887112
    num_agent_steps_trained: 887112
    num_steps_sampled: 887112
    num_steps_trained: 8871

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,444,12066,887112,8.0476,14.72,-0.06,98.42


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 889110
  custom_metrics: {}
  date: 2021-11-09_07-06-56
  done: false
  episode_len_mean: 98.63
  episode_media: {}
  episode_reward_max: 14.720000000000015
  episode_reward_mean: 8.141400000000019
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 8620
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2191545333181109
          entropy_coeff: 0.009999999999999998
          kl: 0.017001822656031423
          policy_loss: -0.04956815723507177
          total_loss: 0.3064838155305811
          vf_explained_var: 0.9333509206771851
          vf_loss: 0.3573500674217939
    num_agent_steps_sampled: 889110
    num_agent_steps_trained: 889110
    num_steps_sampled: 889110
    num_steps_trained: 88911

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,445,12090.8,889110,8.1414,14.72,-0.06,98.63


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 891108
  custom_metrics: {}
  date: 2021-11-09_07-07-23
  done: false
  episode_len_mean: 97.44
  episode_media: {}
  episode_reward_max: 14.760000000000014
  episode_reward_mean: 8.011400000000016
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 8640
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.258411929720924
          entropy_coeff: 0.009999999999999998
          kl: 0.012834079867406607
          policy_loss: -0.030112307926728612
          total_loss: 0.17415040818353494
          vf_explained_var: 0.9662184715270996
          vf_loss: 0.2086237496208577
    num_agent_steps_sampled: 891108
    num_agent_steps_trained: 891108
    num_steps_sampled: 891108
    num_steps_trained: 8911

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,446,12117.6,891108,8.0114,14.76,-0.06,97.44


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 893106
  custom_metrics: {}
  date: 2021-11-09_07-07-49
  done: false
  episode_len_mean: 98.39
  episode_media: {}
  episode_reward_max: 14.760000000000014
  episode_reward_mean: 7.559000000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 8661
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2923941822279068
          entropy_coeff: 0.009999999999999998
          kl: 0.013779358388477686
          policy_loss: -0.04053081256293115
          total_loss: 0.25402345159756284
          vf_explained_var: 0.9576393961906433
          vf_loss: 0.29864945761149836
    num_agent_steps_sampled: 893106
    num_agent_steps_trained: 893106
    num_steps_sampled: 893106
    num_steps_trained: 893

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,447,12143.8,893106,7.559,14.76,-0.06,98.39


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 895104
  custom_metrics: {}
  date: 2021-11-09_07-08-15
  done: false
  episode_len_mean: 98.94
  episode_media: {}
  episode_reward_max: 14.760000000000014
  episode_reward_mean: 7.878600000000017
  episode_reward_min: 1.340000000000003
  episodes_this_iter: 21
  episodes_total: 8682
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.189447611002695
          entropy_coeff: 0.009999999999999998
          kl: 0.008051635497841606
          policy_loss: -0.09900842423417738
          total_loss: 0.02175944031083158
          vf_explained_var: 0.9813254475593567
          vf_loss: 0.12750347659346603
    num_agent_steps_sampled: 895104
    num_agent_steps_trained: 895104
    num_steps_sampled: 895104
    num_steps_trained: 895104
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,448,12169.9,895104,7.8786,14.76,1.34,98.94


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 897102
  custom_metrics: {}
  date: 2021-11-09_07-08-42
  done: false
  episode_len_mean: 96.85
  episode_media: {}
  episode_reward_max: 14.760000000000014
  episode_reward_mean: 7.723600000000017
  episode_reward_min: 1.340000000000003
  episodes_this_iter: 21
  episodes_total: 8703
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2226425403640384
          entropy_coeff: 0.009999999999999998
          kl: 0.011748772417506291
          policy_loss: -0.04146283818852334
          total_loss: 0.12973320086797077
          vf_explained_var: 0.9691489338874817
          vf_loss: 0.17589475996792317
    num_agent_steps_sampled: 897102
    num_agent_steps_trained: 897102
    num_steps_sampled: 897102
    num_steps_trained: 897102


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,449,12196.3,897102,7.7236,14.76,1.34,96.85


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 899100
  custom_metrics: {}
  date: 2021-11-09_07-09-09
  done: false
  episode_len_mean: 95.28
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 7.790100000000017
  episode_reward_min: 1.340000000000003
  episodes_this_iter: 22
  episodes_total: 8725
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.0954819866589138
          entropy_coeff: 0.009999999999999998
          kl: 0.013786336502156903
          policy_loss: -0.010771489852950686
          total_loss: 0.23824832937014953
          vf_explained_var: 0.9678673148155212
          vf_loss: 0.2511414216742629
    num_agent_steps_sampled: 899100
    num_agent_steps_trained: 899100
    num_steps_sampled: 899100
    num_steps_trained: 899100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,450,12223.4,899100,7.7901,14.67,1.34,95.28


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 901098
  custom_metrics: {}
  date: 2021-11-09_07-09-36
  done: false
  episode_len_mean: 96.04
  episode_media: {}
  episode_reward_max: 14.650000000000015
  episode_reward_mean: 7.811300000000017
  episode_reward_min: 1.0000000000000109
  episodes_this_iter: 20
  episodes_total: 8745
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3095058106240771
          entropy_coeff: 0.009999999999999998
          kl: 0.009932000174637296
          policy_loss: -0.06973158429775919
          total_loss: 0.1346161619360958
          vf_explained_var: 0.965749979019165
          vf_loss: 0.21107914749355544
    num_agent_steps_sampled: 901098
    num_agent_steps_trained: 901098
    num_steps_sampled: 901098
    num_steps_trained: 901098
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,451,12250.2,901098,7.8113,14.65,1,96.04


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 903096
  custom_metrics: {}
  date: 2021-11-09_07-10-00
  done: false
  episode_len_mean: 95.88
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 8.045200000000017
  episode_reward_min: 1.0000000000000109
  episodes_this_iter: 21
  episodes_total: 8766
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2735247294108072
          entropy_coeff: 0.009999999999999998
          kl: 0.008436537963763886
          policy_loss: -0.03036355244971457
          total_loss: 0.1281750268701996
          vf_explained_var: 0.9606048464775085
          vf_loss: 0.16586834607379777
    num_agent_steps_sampled: 903096
    num_agent_steps_trained: 903096
    num_steps_sampled: 903096
    num_steps_trained: 903096


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,452,12274.7,903096,8.0452,14.72,1,95.88


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 905094
  custom_metrics: {}
  date: 2021-11-09_07-10-26
  done: false
  episode_len_mean: 95.87
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 7.837700000000017
  episode_reward_min: 1.0000000000000109
  episodes_this_iter: 20
  episodes_total: 8786
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.233112573055994
          entropy_coeff: 0.009999999999999998
          kl: 0.009657329804593691
          policy_loss: -0.01062761577112334
          total_loss: 0.14197863481406656
          vf_explained_var: 0.9655855894088745
          vf_loss: 0.1587497065109866
    num_agent_steps_sampled: 905094
    num_agent_steps_trained: 905094
    num_steps_sampled: 905094
    num_steps_trained: 905094
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,453,12300.5,905094,7.8377,14.72,1,95.87


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 907092
  custom_metrics: {}
  date: 2021-11-09_07-10-52
  done: false
  episode_len_mean: 96.71
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 8.331500000000016
  episode_reward_min: 1.0000000000000109
  episodes_this_iter: 21
  episodes_total: 8807
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.242469839255015
          entropy_coeff: 0.009999999999999998
          kl: 0.01234919041399996
          policy_loss: -0.010385881949748313
          total_loss: 0.21443969499142396
          vf_explained_var: 0.9695245623588562
          vf_loss: 0.22933786949586302
    num_agent_steps_sampled: 907092
    num_agent_steps_trained: 907092
    num_steps_sampled: 907092
    num_steps_trained: 907092


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,454,12326.3,907092,8.3315,14.72,1,96.71


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 909090
  custom_metrics: {}
  date: 2021-11-09_07-11-18
  done: false
  episode_len_mean: 97.81
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 8.257900000000017
  episode_reward_min: 1.0000000000000109
  episodes_this_iter: 20
  episodes_total: 8827
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.1929037255900248
          entropy_coeff: 0.009999999999999998
          kl: 0.01242976166139767
          policy_loss: -0.04095038979181222
          total_loss: 0.16738361674139188
          vf_explained_var: 0.9690564870834351
          vf_loss: 0.21229901475210985
    num_agent_steps_sampled: 909090
    num_agent_steps_trained: 909090
    num_steps_sampled: 909090
    num_steps_trained: 909090


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,455,12352.1,909090,8.2579,14.72,1,97.81


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 911088
  custom_metrics: {}
  date: 2021-11-09_07-11-44
  done: false
  episode_len_mean: 98.61
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 8.287900000000016
  episode_reward_min: 2.750000000000013
  episodes_this_iter: 20
  episodes_total: 8847
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3321716399419876
          entropy_coeff: 0.009999999999999998
          kl: 0.012262827680587843
          policy_loss: -0.0029142489213319052
          total_loss: 0.24688736148001184
          vf_explained_var: 0.9389634132385254
          vf_loss: 0.2552662567013786
    num_agent_steps_sampled: 911088
    num_agent_steps_trained: 911088
    num_steps_sampled: 911088
    num_steps_trained: 911088

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,456,12377.8,911088,8.2879,14.67,2.75,98.61


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 913086
  custom_metrics: {}
  date: 2021-11-09_07-12-11
  done: false
  episode_len_mean: 97.82
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 8.986300000000018
  episode_reward_min: 2.750000000000013
  episodes_this_iter: 21
  episodes_total: 8868
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.1542237826756068
          entropy_coeff: 0.009999999999999998
          kl: 0.009669094596819201
          policy_loss: -0.06771869118369761
          total_loss: 0.07198279750134264
          vf_explained_var: 0.987320601940155
          vf_loss: 0.14504851996898652
    num_agent_steps_sampled: 913086
    num_agent_steps_trained: 913086
    num_steps_sampled: 913086
    num_steps_trained: 913086
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,457,12405.4,913086,8.9863,14.67,2.75,97.82




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 915084
  custom_metrics: {}
  date: 2021-11-09_07-12-52
  done: false
  episode_len_mean: 97.03
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 9.066400000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 8889
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.1409431812309083
          entropy_coeff: 0.009999999999999998
          kl: 0.011817471667703312
          policy_loss: -0.02840615374346574
          total_loss: 0.5257141021035966
          vf_explained_var: 0.9262957572937012
          vf_loss: 0.5579579664482957
    num_agent_steps_sampled: 915084
    num_agent_steps_trained: 915084
    num_steps_sampled: 915084
    num_steps_trained: 91508

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,458,12445.9,915084,9.0664,14.67,-0.06,97.03


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 917082
  custom_metrics: {}
  date: 2021-11-09_07-13-19
  done: false
  episode_len_mean: 97.81
  episode_media: {}
  episode_reward_max: 14.67000000000001
  episode_reward_mean: 8.796900000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 8909
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3255358057362692
          entropy_coeff: 0.009999999999999998
          kl: 0.011966232898247456
          policy_loss: -0.03807369208052045
          total_loss: 0.1103227440179104
          vf_explained_var: 0.955522894859314
          vf_loss: 0.15398475600495226
    num_agent_steps_sampled: 917082
    num_agent_steps_trained: 917082
    num_steps_sampled: 917082
    num_steps_trained: 917082

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,459,12473,917082,8.7969,14.67,-0.06,97.81




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 919080
  custom_metrics: {}
  date: 2021-11-09_07-13-59
  done: false
  episode_len_mean: 97.01
  episode_media: {}
  episode_reward_max: 14.67000000000001
  episode_reward_mean: 8.123100000000019
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 20
  episodes_total: 8929
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3447149628684634
          entropy_coeff: 0.009999999999999998
          kl: 0.008826543381165291
          policy_loss: 0.013551090187614872
          total_loss: 0.287034742995387
          vf_explained_var: 0.912917971611023
          vf_loss: 0.28127543768357666
    num_agent_steps_sampled: 919080
    num_agent_steps_trained: 919080
    num_steps_sampled: 919080
    num_steps_trained: 919080
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,460,12513,919080,8.1231,14.67,-0.15,97.01


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 921078
  custom_metrics: {}
  date: 2021-11-09_07-14-25
  done: false
  episode_len_mean: 97.47
  episode_media: {}
  episode_reward_max: 14.67000000000001
  episode_reward_mean: 8.343400000000017
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 21
  episodes_total: 8950
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3190397063891093
          entropy_coeff: 0.009999999999999998
          kl: 0.012610478178374837
          policy_loss: -0.028489207981952598
          total_loss: 0.14717779255339078
          vf_explained_var: 0.9626034498214722
          vf_loss: 0.18077757902266015
    num_agent_steps_sampled: 921078
    num_agent_steps_trained: 921078
    num_steps_sampled: 921078
    num_steps_trained: 9210

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,461,12539,921078,8.3434,14.67,-0.15,97.47


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 923076
  custom_metrics: {}
  date: 2021-11-09_07-14-52
  done: false
  episode_len_mean: 97.54
  episode_media: {}
  episode_reward_max: 14.700000000000014
  episode_reward_mean: 8.00140000000002
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 20
  episodes_total: 8970
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2617481793676104
          entropy_coeff: 0.009999999999999998
          kl: 0.00897554847556113
          policy_loss: -0.06021280709121909
          total_loss: 0.006057911038043953
          vf_explained_var: 0.988594651222229
          vf_loss: 0.07313736375155193
    num_agent_steps_sampled: 923076
    num_agent_steps_trained: 923076
    num_steps_sampled: 923076
    num_steps_trained: 923076

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,462,12566.2,923076,8.0014,14.7,-0.15,97.54


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 925074
  custom_metrics: {}
  date: 2021-11-09_07-15-17
  done: false
  episode_len_mean: 99.65
  episode_media: {}
  episode_reward_max: 14.700000000000014
  episode_reward_mean: 8.05450000000002
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 18
  episodes_total: 8988
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2770709225109644
          entropy_coeff: 0.009999999999999998
          kl: 0.010126714107506984
          policy_loss: -0.017261506847682455
          total_loss: 0.1444048038195996
          vf_explained_var: 0.9744623303413391
          vf_loss: 0.16794860595393749
    num_agent_steps_sampled: 925074
    num_agent_steps_trained: 925074
    num_steps_sampled: 925074
    num_steps_trained: 92507

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,463,12591.4,925074,8.0545,14.7,-0.15,99.65


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 927072
  custom_metrics: {}
  date: 2021-11-09_07-15-41
  done: false
  episode_len_mean: 100.86
  episode_media: {}
  episode_reward_max: 14.700000000000014
  episode_reward_mean: 8.08140000000002
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 20
  episodes_total: 9008
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.255234682559967
          entropy_coeff: 0.009999999999999998
          kl: 0.01315531231788806
          policy_loss: -0.03766564682480835
          total_loss: 0.12477800510823726
          vf_explained_var: 0.9676408171653748
          vf_loss: 0.16656709135997863
    num_agent_steps_sampled: 927072
    num_agent_steps_trained: 927072
    num_steps_sampled: 927072
    num_steps_trained: 927072

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,464,12615.2,927072,8.0814,14.7,-0.15,100.86


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 929070
  custom_metrics: {}
  date: 2021-11-09_07-16-06
  done: false
  episode_len_mean: 103.97
  episode_media: {}
  episode_reward_max: 14.700000000000014
  episode_reward_mean: 8.46550000000002
  episode_reward_min: 4.1100000000000225
  episodes_this_iter: 19
  episodes_total: 9027
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.295387533732823
          entropy_coeff: 0.009999999999999998
          kl: 0.011666184378022184
          policy_loss: -0.0024640338051886788
          total_loss: 0.10952537093488943
          vf_explained_var: 0.9803905487060547
          vf_loss: 0.11746849126759029
    num_agent_steps_sampled: 929070
    num_agent_steps_trained: 929070
    num_steps_sampled: 929070
    num_steps_trained: 92907

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,465,12639.9,929070,8.4655,14.7,4.11,103.97


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 931068
  custom_metrics: {}
  date: 2021-11-09_07-16-32
  done: false
  episode_len_mean: 102.78
  episode_media: {}
  episode_reward_max: 14.700000000000014
  episode_reward_mean: 8.631200000000018
  episode_reward_min: 3.960000000000021
  episodes_this_iter: 19
  episodes_total: 9046
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.184913606870742
          entropy_coeff: 0.009999999999999998
          kl: 0.012098058136312836
          policy_loss: -0.04493875837042218
          total_loss: 0.1273964810406878
          vf_explained_var: 0.9781928658485413
          vf_loss: 0.17643287533095905
    num_agent_steps_sampled: 931068
    num_agent_steps_trained: 931068
    num_steps_sampled: 931068
    num_steps_trained: 931068
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,466,12666.2,931068,8.6312,14.7,3.96,102.78


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 933066
  custom_metrics: {}
  date: 2021-11-09_07-16-56
  done: false
  episode_len_mean: 104.03
  episode_media: {}
  episode_reward_max: 14.680000000000012
  episode_reward_mean: 8.442500000000019
  episode_reward_min: 3.960000000000021
  episodes_this_iter: 18
  episodes_total: 9064
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.293748902706873
          entropy_coeff: 0.009999999999999998
          kl: 0.010728953984694927
          policy_loss: -0.01751706124771209
          total_loss: 0.16602977097389243
          vf_explained_var: 0.9764853119850159
          vf_loss: 0.18961003940729868
    num_agent_steps_sampled: 933066
    num_agent_steps_trained: 933066
    num_steps_sampled: 933066
    num_steps_trained: 933066


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,467,12690.3,933066,8.4425,14.68,3.96,104.03


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 935064
  custom_metrics: {}
  date: 2021-11-09_07-17-22
  done: false
  episode_len_mean: 104.6
  episode_media: {}
  episode_reward_max: 14.720000000000015
  episode_reward_mean: 8.503900000000021
  episode_reward_min: 3.2900000000000222
  episodes_this_iter: 21
  episodes_total: 9085
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2748760484513781
          entropy_coeff: 0.009999999999999998
          kl: 0.017025997933517157
          policy_loss: -0.044306996358292446
          total_loss: 0.23208422313133875
          vf_explained_var: 0.9490517973899841
          vf_loss: 0.27823103527937615
    num_agent_steps_sampled: 935064
    num_agent_steps_trained: 935064
    num_steps_sampled: 935064
    num_steps_trained: 93506

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,468,12716.2,935064,8.5039,14.72,3.29,104.6


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 937062
  custom_metrics: {}
  date: 2021-11-09_07-17-46
  done: false
  episode_len_mean: 105.99
  episode_media: {}
  episode_reward_max: 14.720000000000015
  episode_reward_mean: 8.496200000000018
  episode_reward_min: 0.7999999999999989
  episodes_this_iter: 18
  episodes_total: 9103
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.306771733647301
          entropy_coeff: 0.009999999999999998
          kl: 0.012928852535175936
          policy_loss: -0.022624168615965615
          total_loss: 0.1215094483856644
          vf_explained_var: 0.9746482968330383
          vf_loss: 0.1489175253858169
    num_agent_steps_sampled: 937062
    num_agent_steps_trained: 937062
    num_steps_sampled: 937062
    num_steps_trained: 937062


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,469,12739.6,937062,8.4962,14.72,0.8,105.99


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 939060
  custom_metrics: {}
  date: 2021-11-09_07-18-12
  done: false
  episode_len_mean: 102.77
  episode_media: {}
  episode_reward_max: 14.720000000000015
  episode_reward_mean: 8.638900000000017
  episode_reward_min: 0.7999999999999989
  episodes_this_iter: 19
  episodes_total: 9122
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.2337208444163912
          entropy_coeff: 0.009999999999999998
          kl: 0.023130605179263342
          policy_loss: -0.013160150178841182
          total_loss: 0.4235162635444708
          vf_explained_var: 0.9579694271087646
          vf_loss: 0.4341933187984285
    num_agent_steps_sampled: 939060
    num_agent_steps_trained: 939060
    num_steps_sampled: 939060
    num_steps_trained: 939060

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,470,12765.4,939060,8.6389,14.72,0.8,102.77


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 941058
  custom_metrics: {}
  date: 2021-11-09_07-18-36
  done: false
  episode_len_mean: 106.06
  episode_media: {}
  episode_reward_max: 14.720000000000015
  episode_reward_mean: 8.41360000000002
  episode_reward_min: 0.7999999999999989
  episodes_this_iter: 20
  episodes_total: 9142
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3123626249177116
          entropy_coeff: 0.009999999999999998
          kl: 0.008421987907587799
          policy_loss: -0.056277229716735225
          total_loss: 0.15107922720767203
          vf_explained_var: 0.9607823491096497
          vf_loss: 0.21238584614225797
    num_agent_steps_sampled: 941058
    num_agent_steps_trained: 941058
    num_steps_sampled: 941058
    num_steps_trained: 94105

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,471,12789.4,941058,8.4136,14.72,0.8,106.06


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 943056
  custom_metrics: {}
  date: 2021-11-09_07-19-01
  done: false
  episode_len_mean: 105.84
  episode_media: {}
  episode_reward_max: 14.720000000000015
  episode_reward_mean: 8.50520000000002
  episode_reward_min: 0.7999999999999989
  episodes_this_iter: 18
  episodes_total: 9160
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2973893256414504
          entropy_coeff: 0.009999999999999998
          kl: 0.007818773524150764
          policy_loss: -0.004457949474453926
          total_loss: 0.13601180472899052
          vf_explained_var: 0.9762645959854126
          vf_loss: 0.14592915120578948
    num_agent_steps_sampled: 943056
    num_agent_steps_trained: 943056
    num_steps_sampled: 943056
    num_steps_trained: 94305

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,472,12814.3,943056,8.5052,14.72,0.8,105.84


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 945054
  custom_metrics: {}
  date: 2021-11-09_07-19-26
  done: false
  episode_len_mean: 105.22
  episode_media: {}
  episode_reward_max: 14.670000000000012
  episode_reward_mean: 8.523000000000017
  episode_reward_min: 0.7999999999999989
  episodes_this_iter: 20
  episodes_total: 9180
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2526766532943363
          entropy_coeff: 0.009999999999999998
          kl: 0.008167537136040471
          policy_loss: -0.018848852210101626
          total_loss: 0.112686083927041
          vf_explained_var: 0.9828803539276123
          vf_loss: 0.13621201266845068
    num_agent_steps_sampled: 945054
    num_agent_steps_trained: 945054
    num_steps_sampled: 945054
    num_steps_trained: 945054

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,473,12839.9,945054,8.523,14.67,0.8,105.22


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 947052
  custom_metrics: {}
  date: 2021-11-09_07-19-51
  done: false
  episode_len_mean: 104.02
  episode_media: {}
  episode_reward_max: 14.670000000000012
  episode_reward_mean: 8.600500000000018
  episode_reward_min: 1.1399999999999992
  episodes_this_iter: 19
  episodes_total: 9199
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749998
          cur_lr: 5.000000000000001e-05
          entropy: 1.28315665324529
          entropy_coeff: 0.009999999999999998
          kl: 0.01085669233394809
          policy_loss: -0.03771705508586906
          total_loss: 0.1273980721653927
          vf_explained_var: 0.9730768203735352
          vf_loss: 0.16751250245031857
    num_agent_steps_sampled: 947052
    num_agent_steps_trained: 947052
    num_steps_sampled: 947052
    num_steps_trained: 947052
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,474,12864.3,947052,8.6005,14.67,1.14,104.02


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 949050
  custom_metrics: {}
  date: 2021-11-09_07-20-16
  done: false
  episode_len_mean: 103.57
  episode_media: {}
  episode_reward_max: 14.670000000000012
  episode_reward_mean: 8.69720000000002
  episode_reward_min: 1.1000000000000034
  episodes_this_iter: 21
  episodes_total: 9220
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2150553399608248
          entropy_coeff: 0.009999999999999998
          kl: 0.008441232825798638
          policy_loss: -0.006060245437991051
          total_loss: 0.12324929353559301
          vf_explained_var: 0.9729400873184204
          vf_loss: 0.1333473578716318
    num_agent_steps_sampled: 949050
    num_agent_steps_trained: 949050
    num_steps_sampled: 949050
    num_steps_trained: 949050

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,475,12890.1,949050,8.6972,14.67,1.1,103.57


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 951048
  custom_metrics: {}
  date: 2021-11-09_07-20-40
  done: false
  episode_len_mean: 103.35
  episode_media: {}
  episode_reward_max: 14.670000000000012
  episode_reward_mean: 8.66860000000002
  episode_reward_min: 1.1000000000000034
  episodes_this_iter: 18
  episodes_total: 9238
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3041623478844053
          entropy_coeff: 0.009999999999999998
          kl: 0.009737177060083407
          policy_loss: -0.014926485983388764
          total_loss: 0.16769730950750056
          vf_explained_var: 0.9602910280227661
          vf_loss: 0.18630717375448771
    num_agent_steps_sampled: 951048
    num_agent_steps_trained: 951048
    num_steps_sampled: 951048
    num_steps_trained: 95104

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,476,12914,951048,8.6686,14.67,1.1,103.35




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 953046
  custom_metrics: {}
  date: 2021-11-09_07-21-19
  done: false
  episode_len_mean: 103.57
  episode_media: {}
  episode_reward_max: 14.690000000000014
  episode_reward_mean: 8.518500000000019
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 9257
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3094225997016544
          entropy_coeff: 0.009999999999999998
          kl: 0.007777644986223633
          policy_loss: -0.016681447465504918
          total_loss: 0.1634947405241075
          vf_explained_var: 0.9456063508987427
          vf_loss: 0.18579544357600666
    num_agent_steps_sampled: 953046
    num_agent_steps_trained: 953046
    num_steps_sampled: 953046
    num_steps_trained: 95

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,477,12952.3,953046,8.5185,14.69,-0.06,103.57




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 955044
  custom_metrics: {}
  date: 2021-11-09_07-22-01
  done: false
  episode_len_mean: 102.65
  episode_media: {}
  episode_reward_max: 14.690000000000014
  episode_reward_mean: 8.547400000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 9278
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749998
          cur_lr: 5.000000000000001e-05
          entropy: 1.287097676027389
          entropy_coeff: 0.009999999999999998
          kl: 0.0077633082392784365
          policy_loss: -0.06014566260079543
          total_loss: 0.09867889612380948
          vf_explained_var: 0.965850830078125
          vf_loss: 0.16423434496280692
    num_agent_steps_sampled: 955044
    num_agent_steps_trained: 955044
    num_steps_sampled: 955044
    num_steps_trained: 955

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,478,12994.5,955044,8.5474,14.69,-0.06,102.65




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 957042
  custom_metrics: {}
  date: 2021-11-09_07-22-41
  done: false
  episode_len_mean: 101.44
  episode_media: {}
  episode_reward_max: 14.690000000000014
  episode_reward_mean: 8.327500000000018
  episode_reward_min: -0.07
  episodes_this_iter: 19
  episodes_total: 9297
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3046112406821477
          entropy_coeff: 0.009999999999999998
          kl: 0.015099065091687461
          policy_loss: -0.02815070826382864
          total_loss: 0.2683397469066438
          vf_explained_var: 0.9315003752708435
          vf_loss: 0.2950250997961987
    num_agent_steps_sampled: 957042
    num_agent_steps_trained: 957042
    num_steps_sampled: 957042
    num_steps_trained: 957042
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,479,13034.1,957042,8.3275,14.69,-0.07,101.44


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 959040
  custom_metrics: {}
  date: 2021-11-09_07-23-06
  done: false
  episode_len_mean: 101.83
  episode_media: {}
  episode_reward_max: 14.690000000000014
  episode_reward_mean: 8.43680000000002
  episode_reward_min: -0.07
  episodes_this_iter: 20
  episodes_total: 9317
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749998
          cur_lr: 5.000000000000001e-05
          entropy: 1.33638847385134
          entropy_coeff: 0.009999999999999998
          kl: 0.008844986538867838
          policy_loss: 0.004775378178982508
          total_loss: 0.21064449625001067
          vf_explained_var: 0.9668878316879272
          vf_loss: 0.21073222905397415
    num_agent_steps_sampled: 959040
    num_agent_steps_trained: 959040
    num_steps_sampled: 959040
    num_steps_trained: 959040
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,480,13059.7,959040,8.4368,14.69,-0.07,101.83


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 961038
  custom_metrics: {}
  date: 2021-11-09_07-23-30
  done: false
  episode_len_mean: 102.22
  episode_media: {}
  episode_reward_max: 14.690000000000015
  episode_reward_mean: 8.370900000000018
  episode_reward_min: -0.07
  episodes_this_iter: 19
  episodes_total: 9336
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749998
          cur_lr: 5.000000000000001e-05
          entropy: 1.370582634494418
          entropy_coeff: 0.009999999999999998
          kl: 0.008615217275097566
          policy_loss: -0.03377928815427281
          total_loss: 0.12762245812586376
          vf_explained_var: 0.9634919762611389
          vf_loss: 0.1668276256039029
    num_agent_steps_sampled: 961038
    num_agent_steps_trained: 961038
    num_steps_sampled: 961038
    num_steps_trained: 961038
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,481,13083.4,961038,8.3709,14.69,-0.07,102.22


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 963036
  custom_metrics: {}
  date: 2021-11-09_07-23-55
  done: false
  episode_len_mean: 100.8
  episode_media: {}
  episode_reward_max: 14.690000000000015
  episode_reward_mean: 8.510100000000017
  episode_reward_min: -0.07
  episodes_this_iter: 19
  episodes_total: 9355
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3483777273268926
          entropy_coeff: 0.009999999999999998
          kl: 0.010576584070230093
          policy_loss: -0.025897859036922453
          total_loss: 0.1397572303545617
          vf_explained_var: 0.9713127017021179
          vf_loss: 0.16897388224800428
    num_agent_steps_sampled: 963036
    num_agent_steps_trained: 963036
    num_steps_sampled: 963036
    num_steps_trained: 963036
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,482,13108.8,963036,8.5101,14.69,-0.07,100.8


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 965034
  custom_metrics: {}
  date: 2021-11-09_07-24-19
  done: false
  episode_len_mean: 102.91
  episode_media: {}
  episode_reward_max: 14.690000000000015
  episode_reward_mean: 8.258200000000018
  episode_reward_min: -0.07
  episodes_this_iter: 18
  episodes_total: 9373
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749998
          cur_lr: 5.000000000000001e-05
          entropy: 1.3783443246568952
          entropy_coeff: 0.009999999999999998
          kl: 0.012819865984949327
          policy_loss: -0.02133625479681151
          total_loss: 0.1971303440186949
          vf_explained_var: 0.9488179683685303
          vf_loss: 0.219929071977025
    num_agent_steps_sampled: 965034
    num_agent_steps_trained: 965034
    num_steps_sampled: 965034
    num_steps_trained: 965034
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,483,13132.8,965034,8.2582,14.69,-0.07,102.91


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 967032
  custom_metrics: {}
  date: 2021-11-09_07-24-44
  done: false
  episode_len_mean: 105.41
  episode_media: {}
  episode_reward_max: 14.690000000000015
  episode_reward_mean: 8.217800000000018
  episode_reward_min: 3.8300000000000214
  episodes_this_iter: 19
  episodes_total: 9392
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749998
          cur_lr: 5.000000000000001e-05
          entropy: 1.2812024479820614
          entropy_coeff: 0.009999999999999998
          kl: 0.013520465235456735
          policy_loss: -0.00968151262828282
          total_loss: 0.20174849164627848
          vf_explained_var: 0.958791971206665
          vf_loss: 0.21124772567834174
    num_agent_steps_sampled: 967032
    num_agent_steps_trained: 967032
    num_steps_sampled: 967032
    num_steps_trained: 967032

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,484,13157.3,967032,8.2178,14.69,3.83,105.41


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 969030
  custom_metrics: {}
  date: 2021-11-09_07-25-09
  done: false
  episode_len_mean: 106.92
  episode_media: {}
  episode_reward_max: 14.690000000000015
  episode_reward_mean: 7.869300000000019
  episode_reward_min: 2.490000000000017
  episodes_this_iter: 18
  episodes_total: 9410
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9610839843749998
          cur_lr: 5.000000000000001e-05
          entropy: 1.4192433005287535
          entropy_coeff: 0.009999999999999998
          kl: 0.023797281646626835
          policy_loss: 0.0056829453756411874
          total_loss: 0.32442486259554115
          vf_explained_var: 0.9472231864929199
          vf_loss: 0.31006315864977385
    num_agent_steps_sampled: 969030
    num_agent_steps_trained: 969030
    num_steps_sampled: 969030
    num_steps_trained: 96903

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,485,13181.8,969030,7.8693,14.69,2.49,106.92


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 971028
  custom_metrics: {}
  date: 2021-11-09_07-25-34
  done: false
  episode_len_mean: 105.81
  episode_media: {}
  episode_reward_max: 14.640000000000015
  episode_reward_mean: 8.07710000000002
  episode_reward_min: 2.490000000000017
  episodes_this_iter: 20
  episodes_total: 9430
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4416259765625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2202425982270921
          entropy_coeff: 0.009999999999999998
          kl: 0.0069970615434194255
          policy_loss: -0.0776516702558313
          total_loss: 0.11292846252520879
          vf_explained_var: 0.9691931009292603
          vf_loss: 0.19269541454102312
    num_agent_steps_sampled: 971028
    num_agent_steps_trained: 971028
    num_steps_sampled: 971028
    num_steps_trained: 971028
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,486,13207.7,971028,8.0771,14.64,2.49,105.81


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 973026
  custom_metrics: {}
  date: 2021-11-09_07-25-59
  done: false
  episode_len_mean: 105.87
  episode_media: {}
  episode_reward_max: 14.640000000000015
  episode_reward_mean: 7.93960000000002
  episode_reward_min: 2.490000000000017
  episodes_this_iter: 20
  episodes_total: 9450
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4416259765625
          cur_lr: 5.000000000000001e-05
          entropy: 1.316893546354203
          entropy_coeff: 0.009999999999999998
          kl: 0.006095951624995006
          policy_loss: -0.05780257515254475
          total_loss: 0.055868073450844914
          vf_explained_var: 0.9712629318237305
          vf_loss: 0.11805150445018496
    num_agent_steps_sampled: 973026
    num_agent_steps_trained: 973026
    num_steps_sampled: 973026
    num_steps_trained: 973026
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,487,13232.3,973026,7.9396,14.64,2.49,105.87


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 975024
  custom_metrics: {}
  date: 2021-11-09_07-26-24
  done: false
  episode_len_mean: 105.63
  episode_media: {}
  episode_reward_max: 14.640000000000015
  episode_reward_mean: 8.223600000000017
  episode_reward_min: 2.490000000000017
  episodes_this_iter: 18
  episodes_total: 9468
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4416259765625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2455804075513568
          entropy_coeff: 0.009999999999999998
          kl: 0.004947152240946221
          policy_loss: -0.1185408479755833
          total_loss: 0.02198632698328722
          vf_explained_var: 0.9807801842689514
          vf_loss: 0.14585103813026631
    num_agent_steps_sampled: 975024
    num_agent_steps_trained: 975024
    num_steps_sampled: 975024
    num_steps_trained: 975024
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,488,13257.7,975024,8.2236,14.64,2.49,105.63


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 977022
  custom_metrics: {}
  date: 2021-11-09_07-26-50
  done: false
  episode_len_mean: 104.23
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 8.278500000000019
  episode_reward_min: 2.490000000000017
  episodes_this_iter: 20
  episodes_total: 9488
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3649146148136684
          entropy_coeff: 0.009999999999999998
          kl: 0.009937563007848805
          policy_loss: -0.09258249751513913
          total_loss: 0.03132838852526176
          vf_explained_var: 0.9679703712463379
          vf_loss: 0.1303969077411152
    num_agent_steps_sampled: 977022
    num_agent_steps_trained: 977022
    num_steps_sampled: 977022
    num_steps_trained: 977022
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,489,13282.7,977022,8.2785,14.67,2.49,104.23


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 979020
  custom_metrics: {}
  date: 2021-11-09_07-27-14
  done: false
  episode_len_mean: 102.78
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 8.445200000000018
  episode_reward_min: 2.820000000000016
  episodes_this_iter: 20
  episodes_total: 9508
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2767489217576526
          entropy_coeff: 0.009999999999999998
          kl: 0.013613878900360695
          policy_loss: -0.052184934727847576
          total_loss: 0.25899076739414817
          vf_explained_var: 0.9430850148200989
          vf_loss: 0.3141301335323425
    num_agent_steps_sampled: 979020
    num_agent_steps_trained: 979020
    num_steps_sampled: 979020
    num_steps_trained: 979020
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,490,13307.3,979020,8.4452,14.67,2.82,102.78


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 981018
  custom_metrics: {}
  date: 2021-11-09_07-27-41
  done: false
  episode_len_mean: 101.99
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 8.298600000000016
  episode_reward_min: 2.820000000000016
  episodes_this_iter: 20
  episodes_total: 9528
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2122203026499068
          entropy_coeff: 0.009999999999999998
          kl: 0.009737003914263147
          policy_loss: -0.005565816554285231
          total_loss: 0.17958246196309727
          vf_explained_var: 0.9662787914276123
          vf_loss: 0.1902519224300271
    num_agent_steps_sampled: 981018
    num_agent_steps_trained: 981018
    num_steps_sampled: 981018
    num_steps_trained: 981018
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,491,13333.7,981018,8.2986,14.67,2.82,101.99


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 983016
  custom_metrics: {}
  date: 2021-11-09_07-28-06
  done: false
  episode_len_mean: 101.75
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.44430000000002
  episode_reward_min: 2.820000000000016
  episodes_this_iter: 20
  episodes_total: 9548
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.173776033946446
          entropy_coeff: 0.009999999999999998
          kl: 0.011706913208260821
          policy_loss: -0.005429130420088768
          total_loss: 0.18326009724821363
          vf_explained_var: 0.9683172106742859
          vf_loss: 0.19198849272160304
    num_agent_steps_sampled: 983016
    num_agent_steps_trained: 983016
    num_steps_sampled: 983016
    num_steps_trained: 983016
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,492,13359.5,983016,8.4443,14.68,2.82,101.75


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 985014
  custom_metrics: {}
  date: 2021-11-09_07-28-33
  done: false
  episode_len_mean: 100.85
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.393600000000019
  episode_reward_min: 2.820000000000016
  episodes_this_iter: 20
  episodes_total: 9568
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2528010391053699
          entropy_coeff: 0.009999999999999998
          kl: 0.00923721134907511
          policy_loss: -0.042329334600695545
          total_loss: 0.06646240665799096
          vf_explained_var: 0.9808676838874817
          vf_loss: 0.11466144811184634
    num_agent_steps_sampled: 985014
    num_agent_steps_trained: 985014
    num_steps_sampled: 985014
    num_steps_trained: 985014
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,493,13386.2,985014,8.3936,14.68,2.82,100.85


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 987012
  custom_metrics: {}
  date: 2021-11-09_07-28-59
  done: false
  episode_len_mean: 100.1
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.765500000000019
  episode_reward_min: 3.780000000000019
  episodes_this_iter: 20
  episodes_total: 9588
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1441243699618748
          entropy_coeff: 0.009999999999999998
          kl: 0.010769665473099535
          policy_loss: -0.04600232651545888
          total_loss: 0.1697909929168721
          vf_explained_var: 0.9758222103118896
          vf_loss: 0.21947164535522462
    num_agent_steps_sampled: 987012
    num_agent_steps_trained: 987012
    num_steps_sampled: 987012
    num_steps_trained: 987012
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,494,13411.9,987012,8.7655,14.68,3.78,100.1




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 989010
  custom_metrics: {}
  date: 2021-11-09_07-29-37
  done: false
  episode_len_mean: 99.97
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.714100000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 9608
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.293176571528117
          entropy_coeff: 0.009999999999999998
          kl: 0.011951805224730222
          policy_loss: -0.0030020952978659244
          total_loss: 0.37717119983973957
          vf_explained_var: 0.9318184852600098
          vf_loss: 0.3844900409380595
    num_agent_steps_sampled: 989010
    num_agent_steps_trained: 989010
    num_steps_sampled: 989010
    num_steps_trained: 98901

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,495,13450,989010,8.7141,14.68,-0.06,99.97


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 991008
  custom_metrics: {}
  date: 2021-11-09_07-30-03
  done: false
  episode_len_mean: 101.83
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.557800000000016
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 9627
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3879045554569789
          entropy_coeff: 0.009999999999999998
          kl: 0.008707270143643088
          policy_loss: -0.0369136057084515
          total_loss: 0.06989564523766083
          vf_explained_var: 0.9675561189651489
          vf_loss: 0.11441198432197174
    num_agent_steps_sampled: 991008
    num_agent_steps_trained: 991008
    num_steps_sampled: 991008
    num_steps_trained: 99100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,496,13476.1,991008,8.5578,14.68,-0.06,101.83




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 993006
  custom_metrics: {}
  date: 2021-11-09_07-30-56
  done: false
  episode_len_mean: 99.47
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.703700000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 9648
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2216593884286426
          entropy_coeff: 0.009999999999999998
          kl: 0.00790468234472834
          policy_loss: -0.07293496633924189
          total_loss: 0.10782630716760953
          vf_explained_var: 0.9767476916313171
          vf_loss: 0.18728006822722298
    num_agent_steps_sampled: 993006
    num_agent_steps_trained: 993006
    num_steps_sampled: 993006
    num_steps_trained: 993006

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,497,13528.4,993006,8.7037,14.68,-0.06,99.47


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 995004
  custom_metrics: {}
  date: 2021-11-09_07-31-22
  done: false
  episode_len_mean: 98.59
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 9.050200000000016
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 9669
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1634296638625008
          entropy_coeff: 0.009999999999999998
          kl: 0.008926080411926171
          policy_loss: -0.027532315502564114
          total_loss: 0.08457667845123935
          vf_explained_var: 0.9868634939193726
          vf_loss: 0.11730925641244366
    num_agent_steps_sampled: 995004
    num_agent_steps_trained: 995004
    num_steps_sampled: 995004
    num_steps_trained: 9950

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,498,13554.5,995004,9.0502,14.68,-0.06,98.59


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 997002
  custom_metrics: {}
  date: 2021-11-09_07-31-47
  done: false
  episode_len_mean: 98.55
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 9.052100000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 9690
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.286922509897323
          entropy_coeff: 0.009999999999999998
          kl: 0.015513240129523683
          policy_loss: -0.024657367843957174
          total_loss: 0.1840913502987297
          vf_explained_var: 0.9751114845275879
          vf_loss: 0.21043580014790808
    num_agent_steps_sampled: 997002
    num_agent_steps_trained: 997002
    num_steps_sampled: 997002
    num_steps_trained: 997002

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,499,13580,997002,9.0521,14.68,-0.06,98.55


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 999000
  custom_metrics: {}
  date: 2021-11-09_07-32-12
  done: false
  episode_len_mean: 97.67
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 9.507700000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 9710
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2671563157013483
          entropy_coeff: 0.009999999999999998
          kl: 0.012695640234861066
          policy_loss: -0.04056090534265552
          total_loss: 0.0822861924057915
          vf_explained_var: 0.9800369143486023
          vf_loss: 0.12636747842743284
    num_agent_steps_sampled: 999000
    num_agent_steps_trained: 999000
    num_steps_sampled: 999000
    num_steps_trained: 999000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,500,13604.3,999000,9.5077,14.68,-0.06,97.67


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1000998
  custom_metrics: {}
  date: 2021-11-09_07-32-36
  done: false
  episode_len_mean: 97.54
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 9.378300000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 9729
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3441310309228442
          entropy_coeff: 0.009999999999999998
          kl: 0.011177307928497977
          policy_loss: -0.02673974163564188
          total_loss: 0.11399736968534334
          vf_explained_var: 0.9755242466926575
          vf_loss: 0.1461216751486063
    num_agent_steps_sampled: 1000998
    num_agent_steps_trained: 1000998
    num_steps_sampled: 1000998
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,501,13629.2,1000998,9.3783,14.68,-0.06,97.54


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1002996
  custom_metrics: {}
  date: 2021-11-09_07-33-01
  done: false
  episode_len_mean: 100.19
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 9.343900000000017
  episode_reward_min: 2.860000000000026
  episodes_this_iter: 19
  episodes_total: 9748
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3184738885788692
          entropy_coeff: 0.009999999999999998
          kl: 0.009309609400842786
          policy_loss: -0.09460366501339844
          total_loss: -0.0003235831501938048
          vf_explained_var: 0.9868431687355042
          vf_loss: 0.10075433364226705
    num_agent_steps_sampled: 1002996
    num_agent_steps_trained: 1002996
    num_steps_sampled: 1002996
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,502,13653.8,1002996,9.3439,14.68,2.86,100.19


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1004994
  custom_metrics: {}
  date: 2021-11-09_07-33-25
  done: false
  episode_len_mean: 101.74
  episode_media: {}
  episode_reward_max: 14.660000000000014
  episode_reward_mean: 9.146500000000017
  episode_reward_min: 2.860000000000026
  episodes_this_iter: 19
  episodes_total: 9767
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2410378160930815
          entropy_coeff: 0.009999999999999998
          kl: 0.012058443348733423
          policy_loss: -0.01637920660986787
          total_loss: 0.1004986823492107
          vf_explained_var: 0.9865846037864685
          vf_loss: 0.12059638388454914
    num_agent_steps_sampled: 1004994
    num_agent_steps_trained: 1004994
    num_steps_sampled: 1004994
    num_steps_trained: 10049

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,503,13678,1004994,9.1465,14.66,2.86,101.74


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1006992
  custom_metrics: {}
  date: 2021-11-09_07-33-50
  done: false
  episode_len_mean: 102.33
  episode_media: {}
  episode_reward_max: 14.660000000000014
  episode_reward_mean: 9.056900000000018
  episode_reward_min: 2.860000000000026
  episodes_this_iter: 20
  episodes_total: 9787
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2499226910727366
          entropy_coeff: 0.009999999999999998
          kl: 0.014336366053020098
          policy_loss: -0.006340887255611874
          total_loss: 0.12418926883754985
          vf_explained_var: 0.979466438293457
          vf_loss: 0.1326955452206589
    num_agent_steps_sampled: 1006992
    num_agent_steps_trained: 1006992
    num_steps_sampled: 1006992
    num_steps_trained: 10069

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,504,13702.4,1006992,9.0569,14.66,2.86,102.33


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1008990
  custom_metrics: {}
  date: 2021-11-09_07-34-15
  done: false
  episode_len_mean: 103.42
  episode_media: {}
  episode_reward_max: 14.660000000000014
  episode_reward_mean: 8.962600000000018
  episode_reward_min: 2.860000000000026
  episodes_this_iter: 19
  episodes_total: 9806
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.296113355386825
          entropy_coeff: 0.009999999999999998
          kl: 0.008999051306867269
          policy_loss: -0.05934094116091728
          total_loss: 0.02492044443885485
          vf_explained_var: 0.9776224493980408
          vf_loss: 0.0907358868047595
    num_agent_steps_sampled: 1008990
    num_agent_steps_trained: 1008990
    num_steps_sampled: 1008990
    num_steps_trained: 100899

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,505,13727.5,1008990,8.9626,14.66,2.86,103.42


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1010988
  custom_metrics: {}
  date: 2021-11-09_07-34-43
  done: false
  episode_len_mean: 104.32
  episode_media: {}
  episode_reward_max: 14.620000000000015
  episode_reward_mean: 8.68320000000002
  episode_reward_min: 2.860000000000026
  episodes_this_iter: 19
  episodes_total: 9825
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.361094195502145
          entropy_coeff: 0.009999999999999998
          kl: 0.010632199200278733
          policy_loss: -0.04237221163653192
          total_loss: 0.05779501452953333
          vf_explained_var: 0.976884126663208
          vf_loss: 0.10611434177983375
    num_agent_steps_sampled: 1010988
    num_agent_steps_trained: 1010988
    num_steps_sampled: 1010988
    num_steps_trained: 1010988

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,506,13755.2,1010988,8.6832,14.62,2.86,104.32


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1012986
  custom_metrics: {}
  date: 2021-11-09_07-35-08
  done: false
  episode_len_mean: 102.95
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 9.29900000000002
  episode_reward_min: 3.4200000000000257
  episodes_this_iter: 20
  episodes_total: 9845
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1649881958961488
          entropy_coeff: 0.009999999999999998
          kl: 0.010360687013952721
          policy_loss: -0.027379632155810085
          total_loss: 0.15150520602862041
          vf_explained_var: 0.9841035604476929
          vf_loss: 0.18306660506696928
    num_agent_steps_sampled: 1012986
    num_agent_steps_trained: 1012986
    num_steps_sampled: 1012986
    num_steps_trained: 101

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,507,13780.3,1012986,9.299,14.71,3.42,102.95


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1014984
  custom_metrics: {}
  date: 2021-11-09_07-35-33
  done: false
  episode_len_mean: 102.48
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.77280000000002
  episode_reward_min: 3.4200000000000257
  episodes_this_iter: 20
  episodes_total: 9865
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3245310130573453
          entropy_coeff: 0.009999999999999998
          kl: 0.018732491706006067
          policy_loss: -0.00622452763574464
          total_loss: 0.34984508196246766
          vf_explained_var: 0.9342565536499023
          vf_loss: 0.3558122939651921
    num_agent_steps_sampled: 1014984
    num_agent_steps_trained: 1014984
    num_steps_sampled: 1014984
    num_steps_trained: 10149

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,508,13805.9,1014984,8.7728,14.71,3.42,102.48


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1016982
  custom_metrics: {}
  date: 2021-11-09_07-35-59
  done: false
  episode_len_mean: 101.35
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.811000000000018
  episode_reward_min: 0.9899999999999991
  episodes_this_iter: 20
  episodes_total: 9885
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.213129856189092
          entropy_coeff: 0.009999999999999998
          kl: 0.008707676167329937
          policy_loss: -0.0023268285163101695
          total_loss: 0.16975901635097607
          vf_explained_var: 0.9635918140411377
          vf_loss: 0.1779405371596416
    num_agent_steps_sampled: 1016982
    num_agent_steps_trained: 1016982
    num_steps_sampled: 1016982
    num_steps_trained: 101

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,509,13831.7,1016982,8.811,14.71,0.99,101.35


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1018980
  custom_metrics: {}
  date: 2021-11-09_07-36-25
  done: false
  episode_len_mean: 101.13
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.769800000000018
  episode_reward_min: 0.9899999999999991
  episodes_this_iter: 21
  episodes_total: 9906
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2151677509148915
          entropy_coeff: 0.009999999999999998
          kl: 0.015753049922310275
          policy_loss: -0.049665897312973224
          total_loss: 0.2219564316705579
          vf_explained_var: 0.951881468296051
          vf_loss: 0.2724190057743163
    num_agent_steps_sampled: 1018980
    num_agent_steps_trained: 1018980
    num_steps_sampled: 1018980
    num_steps_trained: 10189

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,510,13857.6,1018980,8.7698,14.71,0.99,101.13


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1020978
  custom_metrics: {}
  date: 2021-11-09_07-36-51
  done: false
  episode_len_mean: 99.95
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 9.016600000000016
  episode_reward_min: 0.9899999999999991
  episodes_this_iter: 19
  episodes_total: 9925
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2401760793867564
          entropy_coeff: 0.009999999999999998
          kl: 0.009758494539489435
          policy_loss: -0.016422584528724353
          total_loss: 0.11122261800226711
          vf_explained_var: 0.976497232913971
          vf_loss: 0.13301291424958478
    num_agent_steps_sampled: 1020978
    num_agent_steps_trained: 1020978
    num_steps_sampled: 1020978
    num_steps_trained: 1020

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,511,13883.2,1020978,9.0166,14.71,0.99,99.95


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1022976
  custom_metrics: {}
  date: 2021-11-09_07-37-16
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 14.670000000000012
  episode_reward_mean: 8.555800000000017
  episode_reward_min: 0.9899999999999991
  episodes_this_iter: 20
  episodes_total: 9945
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2567764824344998
          entropy_coeff: 0.009999999999999998
          kl: 0.010591503337322363
          policy_loss: -0.04268458311756452
          total_loss: 0.09382198815721841
          vf_explained_var: 0.979353129863739
          vf_loss: 0.14143984276978744
    num_agent_steps_sampled: 1022976
    num_agent_steps_trained: 1022976
    num_steps_sampled: 1022976
    num_steps_trained: 10229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,512,13908.5,1022976,8.5558,14.67,0.99,100




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1024974
  custom_metrics: {}
  date: 2021-11-09_07-37-58
  done: false
  episode_len_mean: 98.58
  episode_media: {}
  episode_reward_max: 14.670000000000012
  episode_reward_mean: 8.814200000000017
  episode_reward_min: -0.05
  episodes_this_iter: 21
  episodes_total: 9966
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2410150919641767
          entropy_coeff: 0.009999999999999998
          kl: 0.011226401426314815
          policy_loss: -0.023201224247791936
          total_loss: 0.21124985939157861
          vf_explained_var: 0.9746416807174683
          vf_loss: 0.2387690977503856
    num_agent_steps_sampled: 1024974
    num_agent_steps_trained: 1024974
    num_steps_sampled: 1024974
    num_steps_trained: 1024974
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,513,13950.5,1024974,8.8142,14.67,-0.05,98.58


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1026972
  custom_metrics: {}
  date: 2021-11-09_07-38-23
  done: false
  episode_len_mean: 99.43
  episode_media: {}
  episode_reward_max: 14.670000000000012
  episode_reward_mean: 8.697900000000018
  episode_reward_min: -0.05
  episodes_this_iter: 20
  episodes_total: 9986
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2851168694950286
          entropy_coeff: 0.009999999999999998
          kl: 0.010881784272154426
          policy_loss: -0.016706717227186474
          total_loss: 0.13344550459157853
          vf_explained_var: 0.9678093194961548
          vf_loss: 0.15515965961274647
    num_agent_steps_sampled: 1026972
    num_agent_steps_trained: 1026972
    num_steps_sampled: 1026972
    num_steps_trained: 1026972
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,514,13975.1,1026972,8.6979,14.67,-0.05,99.43




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1028970
  custom_metrics: {}
  date: 2021-11-09_07-39-05
  done: false
  episode_len_mean: 98.36
  episode_media: {}
  episode_reward_max: 14.670000000000012
  episode_reward_mean: 8.530300000000018
  episode_reward_min: -0.05
  episodes_this_iter: 21
  episodes_total: 10007
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2281716028849283
          entropy_coeff: 0.009999999999999998
          kl: 0.010286710268723555
          policy_loss: -0.03553675835331281
          total_loss: 0.3015340448490211
          vf_explained_var: 0.9617236852645874
          vf_loss: 0.34193772253181254
    num_agent_steps_sampled: 1028970
    num_agent_steps_trained: 1028970
    num_steps_sampled: 1028970
    num_steps_trained: 1028970
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,515,14017.4,1028970,8.5303,14.67,-0.05,98.36


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1030968
  custom_metrics: {}
  date: 2021-11-09_07-39-30
  done: false
  episode_len_mean: 99.07
  episode_media: {}
  episode_reward_max: 14.670000000000012
  episode_reward_mean: 8.432600000000019
  episode_reward_min: -0.05
  episodes_this_iter: 18
  episodes_total: 10025
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2643639275005885
          entropy_coeff: 0.009999999999999998
          kl: 0.015935218353488063
          policy_loss: -0.01025861817456427
          total_loss: 0.1696725909553823
          vf_explained_var: 0.9718122482299805
          vf_loss: 0.1810885358424414
    num_agent_steps_sampled: 1030968
    num_agent_steps_trained: 1030968
    num_steps_sampled: 1030968
    num_steps_trained: 1030968
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,516,14042.2,1030968,8.4326,14.67,-0.05,99.07


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1032966
  custom_metrics: {}
  date: 2021-11-09_07-39-54
  done: false
  episode_len_mean: 99.88
  episode_media: {}
  episode_reward_max: 14.640000000000017
  episode_reward_mean: 8.531900000000018
  episode_reward_min: -0.05
  episodes_this_iter: 20
  episodes_total: 10045
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2776159536270868
          entropy_coeff: 0.009999999999999998
          kl: 0.009513246783243511
          policy_loss: -0.02314571963534469
          total_loss: 0.07234317621304875
          vf_explained_var: 0.9863993525505066
          vf_loss: 0.10140778280439831
    num_agent_steps_sampled: 1032966
    num_agent_steps_trained: 1032966
    num_steps_sampled: 1032966
    num_steps_trained: 1032966
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,517,14066.5,1032966,8.5319,14.64,-0.05,99.88


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1034964
  custom_metrics: {}
  date: 2021-11-09_07-40-20
  done: false
  episode_len_mean: 100.64
  episode_media: {}
  episode_reward_max: 14.660000000000014
  episode_reward_mean: 8.979900000000018
  episode_reward_min: -0.05
  episodes_this_iter: 20
  episodes_total: 10065
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2633673633847917
          entropy_coeff: 0.009999999999999998
          kl: 0.014655400485711685
          policy_loss: -0.019934272127492086
          total_loss: 0.15943897792271206
          vf_explained_var: 0.9821833372116089
          vf_loss: 0.18144312155033862
    num_agent_steps_sampled: 1034964
    num_agent_steps_trained: 1034964
    num_steps_sampled: 1034964
    num_steps_trained: 1034964
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,518,14092.3,1034964,8.9799,14.66,-0.05,100.64


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1036962
  custom_metrics: {}
  date: 2021-11-09_07-40-44
  done: false
  episode_len_mean: 101.72
  episode_media: {}
  episode_reward_max: 14.660000000000014
  episode_reward_mean: 9.143700000000019
  episode_reward_min: -0.05
  episodes_this_iter: 19
  episodes_total: 10084
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2708647693906512
          entropy_coeff: 0.009999999999999998
          kl: 0.011415090488834935
          policy_loss: -0.0031029009348934604
          total_loss: 0.2768696361088327
          vf_explained_var: 0.9669061303138733
          vf_loss: 0.28445304009531225
    num_agent_steps_sampled: 1036962
    num_agent_steps_trained: 1036962
    num_steps_sampled: 1036962
    num_steps_trained: 1036962
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,519,14116.4,1036962,9.1437,14.66,-0.05,101.72


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1038960
  custom_metrics: {}
  date: 2021-11-09_07-41-08
  done: false
  episode_len_mean: 103.93
  episode_media: {}
  episode_reward_max: 14.660000000000014
  episode_reward_mean: 9.168200000000018
  episode_reward_min: 3.820000000000022
  episodes_this_iter: 19
  episodes_total: 10103
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3125223818279448
          entropy_coeff: 0.009999999999999998
          kl: 0.011352305855591808
          policy_loss: -0.03207009709661915
          total_loss: 0.11554123459472543
          vf_explained_var: 0.9784771203994751
          vf_loss: 0.15255366751835459
    num_agent_steps_sampled: 1038960
    num_agent_steps_trained: 1038960
    num_steps_sampled: 1038960
    num_steps_trained: 103

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,520,14140,1038960,9.1682,14.66,3.82,103.93


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1040958
  custom_metrics: {}
  date: 2021-11-09_07-41-32
  done: false
  episode_len_mean: 104.86
  episode_media: {}
  episode_reward_max: 14.660000000000014
  episode_reward_mean: 9.116900000000017
  episode_reward_min: 3.100000000000024
  episodes_this_iter: 18
  episodes_total: 10121
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.285502997466496
          entropy_coeff: 0.009999999999999998
          kl: 0.010670001256316805
          policy_loss: -0.03397948287782215
          total_loss: 0.12107719099592595
          vf_explained_var: 0.9705522656440735
          vf_loss: 0.16022062805436907
    num_agent_steps_sampled: 1040958
    num_agent_steps_trained: 1040958
    num_steps_sampled: 1040958
    num_steps_trained: 1040

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,521,14163.8,1040958,9.1169,14.66,3.1,104.86


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1042956
  custom_metrics: {}
  date: 2021-11-09_07-41-57
  done: false
  episode_len_mean: 105.03
  episode_media: {}
  episode_reward_max: 14.660000000000014
  episode_reward_mean: 8.922600000000019
  episode_reward_min: 3.100000000000024
  episodes_this_iter: 19
  episodes_total: 10140
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3167627238091968
          entropy_coeff: 0.009999999999999998
          kl: 0.01054320919423008
          policy_loss: -0.019457407295703888
          total_loss: 0.17544134280511312
          vf_explained_var: 0.9637347459793091
          vf_loss: 0.20046669638582637
    num_agent_steps_sampled: 1042956
    num_agent_steps_trained: 1042956
    num_steps_sampled: 1042956
    num_steps_trained: 104

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,522,14188.8,1042956,8.9226,14.66,3.1,105.03


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1044954
  custom_metrics: {}
  date: 2021-11-09_07-42-23
  done: false
  episode_len_mean: 104.02
  episode_media: {}
  episode_reward_max: 14.660000000000014
  episode_reward_mean: 8.67140000000002
  episode_reward_min: 2.610000000000009
  episodes_this_iter: 21
  episodes_total: 10161
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1430746044431415
          entropy_coeff: 0.009999999999999998
          kl: 0.00913860965487973
          policy_loss: -0.07027195887196631
          total_loss: 0.11097053099601041
          vf_explained_var: 0.9737758040428162
          vf_loss: 0.18608600940732728
    num_agent_steps_sampled: 1044954
    num_agent_steps_trained: 1044954
    num_steps_sampled: 1044954
    num_steps_trained: 10449

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,523,14214.9,1044954,8.6714,14.66,2.61,104.02


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1046952
  custom_metrics: {}
  date: 2021-11-09_07-42-48
  done: false
  episode_len_mean: 102.73
  episode_media: {}
  episode_reward_max: 14.670000000000012
  episode_reward_mean: 8.766200000000017
  episode_reward_min: 2.610000000000009
  episodes_this_iter: 20
  episodes_total: 10181
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.196045578661419
          entropy_coeff: 0.009999999999999998
          kl: 0.010803483550389511
          policy_loss: -0.026828008748236157
          total_loss: 0.08658951664609568
          vf_explained_var: 0.9866426587104797
          vf_loss: 0.11759068975668578
    num_agent_steps_sampled: 1046952
    num_agent_steps_trained: 1046952
    num_steps_sampled: 1046952
    num_steps_trained: 104

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,524,14239.6,1046952,8.7662,14.67,2.61,102.73


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1048950
  custom_metrics: {}
  date: 2021-11-09_07-43-12
  done: false
  episode_len_mean: 102.92
  episode_media: {}
  episode_reward_max: 14.670000000000012
  episode_reward_mean: 8.658400000000016
  episode_reward_min: 2.610000000000009
  episodes_this_iter: 18
  episodes_total: 10199
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.30164754986763
          entropy_coeff: 0.009999999999999998
          kl: 0.010879206301524561
          policy_loss: -0.007475979005297025
          total_loss: 0.08584250497764774
          vf_explained_var: 0.9809394478797913
          vf_loss: 0.09849308727397806
    num_agent_steps_sampled: 1048950
    num_agent_steps_trained: 1048950
    num_steps_sampled: 1048950
    num_steps_trained: 1048

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,525,14263.6,1048950,8.6584,14.67,2.61,102.92


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1050948
  custom_metrics: {}
  date: 2021-11-09_07-43-37
  done: false
  episode_len_mean: 101.92
  episode_media: {}
  episode_reward_max: 14.670000000000012
  episode_reward_mean: 8.52910000000002
  episode_reward_min: 2.170000000000018
  episodes_this_iter: 21
  episodes_total: 10220
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.21611683567365
          entropy_coeff: 0.009999999999999998
          kl: 0.01060315038943243
          policy_loss: -0.0029836052940005347
          total_loss: 0.12884738580102012
          vf_explained_var: 0.9806031584739685
          vf_loss: 0.13634927143298445
    num_agent_steps_sampled: 1050948
    num_agent_steps_trained: 1050948
    num_steps_sampled: 1050948
    num_steps_trained: 10509

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,526,14289.1,1050948,8.5291,14.67,2.17,101.92


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1052946
  custom_metrics: {}
  date: 2021-11-09_07-44-03
  done: false
  episode_len_mean: 100.41
  episode_media: {}
  episode_reward_max: 14.670000000000012
  episode_reward_mean: 8.54550000000002
  episode_reward_min: 2.170000000000018
  episodes_this_iter: 20
  episodes_total: 10240
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2535590239933558
          entropy_coeff: 0.009999999999999998
          kl: 0.01439724202899461
          policy_loss: -0.037997076252386686
          total_loss: 0.11835167257647429
          vf_explained_var: 0.9777389764785767
          vf_loss: 0.1585066201254016
    num_agent_steps_sampled: 1052946
    num_agent_steps_trained: 1052946
    num_steps_sampled: 1052946
    num_steps_trained: 10529

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,527,14314.5,1052946,8.5455,14.67,2.17,100.41


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1054944
  custom_metrics: {}
  date: 2021-11-09_07-44-28
  done: false
  episode_len_mean: 102.28
  episode_media: {}
  episode_reward_max: 14.700000000000014
  episode_reward_mean: 8.509500000000019
  episode_reward_min: 2.170000000000018
  episodes_this_iter: 20
  episodes_total: 10260
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3201450591995603
          entropy_coeff: 0.009999999999999998
          kl: 0.01050506567816072
          policy_loss: -0.016372356138059072
          total_loss: 0.10069258065805549
          vf_explained_var: 0.9748987555503845
          vf_loss: 0.12269420228188946
    num_agent_steps_sampled: 1054944
    num_agent_steps_trained: 1054944
    num_steps_sampled: 1054944
    num_steps_trained: 105

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,528,14339.4,1054944,8.5095,14.7,2.17,102.28


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1056942
  custom_metrics: {}
  date: 2021-11-09_07-44-55
  done: false
  episode_len_mean: 100.71
  episode_media: {}
  episode_reward_max: 14.780000000000012
  episode_reward_mean: 8.584400000000018
  episode_reward_min: 2.170000000000018
  episodes_this_iter: 20
  episodes_total: 10280
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1851507513296036
          entropy_coeff: 0.009999999999999998
          kl: 0.011171001636449426
          policy_loss: -0.01947297959455422
          total_loss: 0.12155022287652606
          vf_explained_var: 0.9835809469223022
          vf_loss: 0.1448225048148916
    num_agent_steps_sampled: 1056942
    num_agent_steps_trained: 1056942
    num_steps_sampled: 1056942
    num_steps_trained: 1056

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,529,14366.8,1056942,8.5844,14.78,2.17,100.71


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1058940
  custom_metrics: {}
  date: 2021-11-09_07-45-20
  done: false
  episode_len_mean: 99.77
  episode_media: {}
  episode_reward_max: 14.780000000000012
  episode_reward_mean: 8.59890000000002
  episode_reward_min: 2.170000000000018
  episodes_this_iter: 20
  episodes_total: 10300
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2570944559006465
          entropy_coeff: 0.009999999999999998
          kl: 0.008756233035629772
          policy_loss: -0.072125640130114
          total_loss: 0.03351811677040089
          vf_explained_var: 0.9805317521095276
          vf_loss: 0.11190309340045565
    num_agent_steps_sampled: 1058940
    num_agent_steps_trained: 1058940
    num_steps_sampled: 1058940
    num_steps_trained: 1058940

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,530,14391.6,1058940,8.5989,14.78,2.17,99.77




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1060938
  custom_metrics: {}
  date: 2021-11-09_07-46-02
  done: false
  episode_len_mean: 99.97
  episode_media: {}
  episode_reward_max: 14.780000000000012
  episode_reward_mean: 8.64790000000002
  episode_reward_min: 3.82000000000002
  episodes_this_iter: 19
  episodes_total: 10319
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3059571039109004
          entropy_coeff: 0.009999999999999998
          kl: 0.01033519548075554
          policy_loss: -0.039181757332491025
          total_loss: 0.09849048759788274
          vf_explained_var: 0.9693161845207214
          vf_loss: 0.14328207249442737
    num_agent_steps_sampled: 1060938
    num_agent_steps_trained: 1060938
    num_steps_sampled: 1060938
    num_steps_trained: 106093

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,531,14434.2,1060938,8.6479,14.78,3.82,99.97


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1062936
  custom_metrics: {}
  date: 2021-11-09_07-46-29
  done: false
  episode_len_mean: 99.63
  episode_media: {}
  episode_reward_max: 14.780000000000012
  episode_reward_mean: 8.823100000000016
  episode_reward_min: 3.0100000000000144
  episodes_this_iter: 21
  episodes_total: 10340
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2312117928550357
          entropy_coeff: 0.009999999999999998
          kl: 0.015666045424478577
          policy_loss: -0.07600178140259925
          total_loss: 0.12290565082359882
          vf_explained_var: 0.9714348912239075
          vf_loss: 0.19992726234098276
    num_agent_steps_sampled: 1062936
    num_agent_steps_trained: 1062936
    num_steps_sampled: 1062936
    num_steps_trained: 106

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,532,14460.6,1062936,8.8231,14.78,3.01,99.63




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1064934
  custom_metrics: {}
  date: 2021-11-09_07-47-11
  done: false
  episode_len_mean: 97.37
  episode_media: {}
  episode_reward_max: 14.780000000000012
  episode_reward_mean: 8.725000000000017
  episode_reward_min: -0.07
  episodes_this_iter: 22
  episodes_total: 10362
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2270041902860005
          entropy_coeff: 0.009999999999999998
          kl: 0.012506289993291566
          policy_loss: -0.015901478034045013
          total_loss: 0.49419116194226914
          vf_explained_var: 0.9364272952079773
          vf_loss: 0.5133479846730119
    num_agent_steps_sampled: 1064934
    num_agent_steps_trained: 1064934
    num_steps_sampled: 1064934
    num_steps_trained: 1064934
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,533,14503.1,1064934,8.725,14.78,-0.07,97.37


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1066932
  custom_metrics: {}
  date: 2021-11-09_07-47-39
  done: false
  episode_len_mean: 97.1
  episode_media: {}
  episode_reward_max: 14.730000000000013
  episode_reward_mean: 8.721600000000018
  episode_reward_min: -0.07
  episodes_this_iter: 20
  episodes_total: 10382
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1362243782906305
          entropy_coeff: 0.009999999999999998
          kl: 0.016156361896301756
          policy_loss: -0.033076024286094165
          total_loss: 0.19395262299194221
          vf_explained_var: 0.9717828035354614
          vf_loss: 0.2267451787544858
    num_agent_steps_sampled: 1066932
    num_agent_steps_trained: 1066932
    num_steps_sampled: 1066932
    num_steps_trained: 1066932
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,534,14530.5,1066932,8.7216,14.73,-0.07,97.1


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1068930
  custom_metrics: {}
  date: 2021-11-09_07-48-05
  done: false
  episode_len_mean: 96.62
  episode_media: {}
  episode_reward_max: 14.730000000000013
  episode_reward_mean: 8.813300000000018
  episode_reward_min: -0.07
  episodes_this_iter: 22
  episodes_total: 10404
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1665954379808334
          entropy_coeff: 0.009999999999999998
          kl: 0.008606521286968576
          policy_loss: -0.01327788338419937
          total_loss: 0.09375183051008554
          vf_explained_var: 0.9862886071205139
          vf_loss: 0.11249197531668913
    num_agent_steps_sampled: 1068930
    num_agent_steps_trained: 1068930
    num_steps_sampled: 1068930
    num_steps_trained: 1068930
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,535,14557.1,1068930,8.8133,14.73,-0.07,96.62


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1070928
  custom_metrics: {}
  date: 2021-11-09_07-48-31
  done: false
  episode_len_mean: 96.35
  episode_media: {}
  episode_reward_max: 14.730000000000013
  episode_reward_mean: 9.138800000000018
  episode_reward_min: -0.07
  episodes_this_iter: 19
  episodes_total: 10423
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1646205254963466
          entropy_coeff: 0.009999999999999998
          kl: 0.009243093117402236
          policy_loss: -0.04495144271779628
          total_loss: 0.07275239422562577
          vf_explained_var: 0.9736900925636292
          vf_loss: 0.12268750195701918
    num_agent_steps_sampled: 1070928
    num_agent_steps_trained: 1070928
    num_steps_sampled: 1070928
    num_steps_trained: 1070928
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,536,14582.6,1070928,9.1388,14.73,-0.07,96.35


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1072926
  custom_metrics: {}
  date: 2021-11-09_07-48-57
  done: false
  episode_len_mean: 95.97
  episode_media: {}
  episode_reward_max: 14.730000000000013
  episode_reward_mean: 9.576600000000019
  episode_reward_min: -0.07
  episodes_this_iter: 21
  episodes_total: 10444
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.179175215959549
          entropy_coeff: 0.009999999999999998
          kl: 0.008770940095384951
          policy_loss: -0.07216532072495846
          total_loss: 0.0038642801433092073
          vf_explained_var: 0.9904115796089172
          vf_loss: 0.08149914522433564
    num_agent_steps_sampled: 1072926
    num_agent_steps_trained: 1072926
    num_steps_sampled: 1072926
    num_steps_trained: 1072926
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,537,14609.1,1072926,9.5766,14.73,-0.07,95.97


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1074924
  custom_metrics: {}
  date: 2021-11-09_07-49-21
  done: false
  episode_len_mean: 98.46
  episode_media: {}
  episode_reward_max: 14.730000000000013
  episode_reward_mean: 9.76740000000002
  episode_reward_min: 3.130000000000009
  episodes_this_iter: 18
  episodes_total: 10462
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2391116482870919
          entropy_coeff: 0.009999999999999998
          kl: 0.010495329223941615
          policy_loss: -0.05150519135807242
          total_loss: 0.04933085344022228
          vf_explained_var: 0.9824755191802979
          vf_loss: 0.10566198989039376
    num_agent_steps_sampled: 1074924
    num_agent_steps_trained: 1074924
    num_steps_sampled: 1074924
    num_steps_trained: 10749

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,538,14632.6,1074924,9.7674,14.73,3.13,98.46


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1076922
  custom_metrics: {}
  date: 2021-11-09_07-49-47
  done: false
  episode_len_mean: 99.51
  episode_media: {}
  episode_reward_max: 14.730000000000013
  episode_reward_mean: 9.496900000000016
  episode_reward_min: 3.920000000000022
  episodes_this_iter: 21
  episodes_total: 10483
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2780751461074467
          entropy_coeff: 0.009999999999999998
          kl: 0.008043441367100492
          policy_loss: -0.054435378313064574
          total_loss: 0.03688675335918864
          vf_explained_var: 0.9836627840995789
          vf_loss: 0.09830506504291579
    num_agent_steps_sampled: 1076922
    num_agent_steps_trained: 1076922
    num_steps_sampled: 1076922
    num_steps_trained: 107

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,539,14658.2,1076922,9.4969,14.73,3.92,99.51


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1078920
  custom_metrics: {}
  date: 2021-11-09_07-50-13
  done: false
  episode_len_mean: 99.9
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 9.311800000000016
  episode_reward_min: 3.5000000000000213
  episodes_this_iter: 20
  episodes_total: 10503
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.269730071794419
          entropy_coeff: 0.009999999999999998
          kl: 0.017238078849888913
          policy_loss: -0.016617246859130404
          total_loss: 0.22852272192637127
          vf_explained_var: 0.9637792706489563
          vf_loss: 0.24541183743803274
    num_agent_steps_sampled: 1078920
    num_agent_steps_trained: 1078920
    num_steps_sampled: 1078920
    num_steps_trained: 1078

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,540,14684.1,1078920,9.3118,14.71,3.5,99.9


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1080918
  custom_metrics: {}
  date: 2021-11-09_07-50-40
  done: false
  episode_len_mean: 99.92
  episode_media: {}
  episode_reward_max: 14.700000000000012
  episode_reward_mean: 9.266400000000019
  episode_reward_min: 3.5000000000000213
  episodes_this_iter: 20
  episodes_total: 10523
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1409497823034014
          entropy_coeff: 0.009999999999999998
          kl: 0.013170674848438408
          policy_loss: -0.05417180614812034
          total_loss: 0.09661461499830087
          vf_explained_var: 0.9805542230606079
          vf_loss: 0.152702326575915
    num_agent_steps_sampled: 1080918
    num_agent_steps_trained: 1080918
    num_steps_sampled: 1080918
    num_steps_trained: 10809

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,541,14711.3,1080918,9.2664,14.7,3.5,99.92


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1082916
  custom_metrics: {}
  date: 2021-11-09_07-51-06
  done: false
  episode_len_mean: 100.64
  episode_media: {}
  episode_reward_max: 14.700000000000012
  episode_reward_mean: 9.12410000000002
  episode_reward_min: 3.4400000000000226
  episodes_this_iter: 20
  episodes_total: 10543
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.118549698023569
          entropy_coeff: 0.009999999999999998
          kl: 0.012359889029906751
          policy_loss: -0.054339907371572085
          total_loss: 0.04726589938536996
          vf_explained_var: 0.9861155152320862
          vf_loss: 0.1038821337833291
    num_agent_steps_sampled: 1082916
    num_agent_steps_trained: 1082916
    num_steps_sampled: 1082916
    num_steps_trained: 1082

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,542,14737.8,1082916,9.1241,14.7,3.44,100.64


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1084914
  custom_metrics: {}
  date: 2021-11-09_07-51-32
  done: false
  episode_len_mean: 100.54
  episode_media: {}
  episode_reward_max: 14.700000000000012
  episode_reward_mean: 9.126000000000019
  episode_reward_min: 3.4400000000000226
  episodes_this_iter: 19
  episodes_total: 10562
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2156971096992493
          entropy_coeff: 0.009999999999999998
          kl: 0.00990708548619749
          policy_loss: -0.02326853122739565
          total_loss: 0.09285352366665998
          vf_explained_var: 0.9824304580688477
          vf_loss: 0.12113786846221912
    num_agent_steps_sampled: 1084914
    num_agent_steps_trained: 1084914
    num_steps_sampled: 1084914
    num_steps_trained: 108

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,543,14763.2,1084914,9.126,14.7,3.44,100.54


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1086912
  custom_metrics: {}
  date: 2021-11-09_07-51-58
  done: false
  episode_len_mean: 100.62
  episode_media: {}
  episode_reward_max: 14.700000000000012
  episode_reward_mean: 9.264900000000019
  episode_reward_min: 3.4400000000000226
  episodes_this_iter: 20
  episodes_total: 10582
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2686386199224562
          entropy_coeff: 0.009999999999999998
          kl: 0.010063039470355084
          policy_loss: -0.06276469264356863
          total_loss: 0.04348480062825339
          vf_explained_var: 0.9745402932167053
          vf_loss: 0.11168231067380735
    num_agent_steps_sampled: 1086912
    num_agent_steps_trained: 1086912
    num_steps_sampled: 1086912
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,544,14789.2,1086912,9.2649,14.7,3.44,100.62


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1088910
  custom_metrics: {}
  date: 2021-11-09_07-52-24
  done: false
  episode_len_mean: 100.21
  episode_media: {}
  episode_reward_max: 14.700000000000012
  episode_reward_mean: 9.111900000000018
  episode_reward_min: 3.1200000000000156
  episodes_this_iter: 21
  episodes_total: 10603
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.151420529683431
          entropy_coeff: 0.009999999999999998
          kl: 0.0093899127494549
          policy_loss: -0.026944835421939692
          total_loss: 0.10027517950428384
          vf_explained_var: 0.9815118908882141
          vf_loss: 0.13196584876804124
    num_agent_steps_sampled: 1088910
    num_agent_steps_trained: 1088910
    num_steps_sampled: 1088910
    num_steps_trained: 1088

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,545,14815.3,1088910,9.1119,14.7,3.12,100.21


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1090908
  custom_metrics: {}
  date: 2021-11-09_07-52-49
  done: false
  episode_len_mean: 99.5
  episode_media: {}
  episode_reward_max: 14.700000000000012
  episode_reward_mean: 9.002700000000019
  episode_reward_min: 2.490000000000027
  episodes_this_iter: 21
  episodes_total: 10624
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2164947066988263
          entropy_coeff: 0.009999999999999998
          kl: 0.01354723147924594
          policy_loss: -0.04213932092700686
          total_loss: 0.07855692955532244
          vf_explained_var: 0.9739426374435425
          vf_loss: 0.12309617698192596
    num_agent_steps_sampled: 1090908
    num_agent_steps_trained: 1090908
    num_steps_sampled: 1090908
    num_steps_trained: 109090

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,546,14840.7,1090908,9.0027,14.7,2.49,99.5


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1092906
  custom_metrics: {}
  date: 2021-11-09_07-53-16
  done: false
  episode_len_mean: 98.03
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.679800000000018
  episode_reward_min: 2.490000000000027
  episodes_this_iter: 21
  episodes_total: 10645
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1496280812081836
          entropy_coeff: 0.009999999999999998
          kl: 0.01380290144047515
          policy_loss: -0.041624748866472924
          total_loss: 0.08347647321366128
          vf_explained_var: 0.964237630367279
          vf_loss: 0.12664819397032262
    num_agent_steps_sampled: 1092906
    num_agent_steps_trained: 1092906
    num_steps_sampled: 1092906
    num_steps_trained: 10929

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,547,14867.2,1092906,8.6798,14.68,2.49,98.03




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1094904
  custom_metrics: {}
  date: 2021-11-09_07-53-56
  done: false
  episode_len_mean: 97.04
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.303300000000016
  episode_reward_min: -0.01
  episodes_this_iter: 21
  episodes_total: 10666
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2049337131636484
          entropy_coeff: 0.009999999999999998
          kl: 0.014452583525187855
          policy_loss: -0.04988464235904671
          total_loss: 0.16584685662583937
          vf_explained_var: 0.9631613492965698
          vf_loss: 0.2173632238769815
    num_agent_steps_sampled: 1094904
    num_agent_steps_trained: 1094904
    num_steps_sampled: 1094904
    num_steps_trained: 1094904
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,548,14907.3,1094904,8.3033,14.68,-0.01,97.04


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1096902
  custom_metrics: {}
  date: 2021-11-09_07-54-22
  done: false
  episode_len_mean: 96.94
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.308900000000017
  episode_reward_min: -0.01
  episodes_this_iter: 19
  episodes_total: 10685
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2011679655029661
          entropy_coeff: 0.009999999999999998
          kl: 0.016909427850453143
          policy_loss: -0.017594616292488006
          total_loss: 0.12829647522774482
          vf_explained_var: 0.9744007587432861
          vf_loss: 0.14571423748774187
    num_agent_steps_sampled: 1096902
    num_agent_steps_trained: 1096902
    num_steps_sampled: 1096902
    num_steps_trained: 1096902
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,549,14932.8,1096902,8.3089,14.68,-0.01,96.94




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1098900
  custom_metrics: {}
  date: 2021-11-09_07-55-01
  done: false
  episode_len_mean: 95.8
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 8.658500000000018
  episode_reward_min: -0.02
  episodes_this_iter: 21
  episodes_total: 10706
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1999406658467793
          entropy_coeff: 0.009999999999999998
          kl: 0.010780626779490276
          policy_loss: -0.031076089736251603
          total_loss: 0.17707314060202667
          vf_explained_var: 0.9743521809577942
          vf_loss: 0.2123778253261532
    num_agent_steps_sampled: 1098900
    num_agent_steps_trained: 1098900
    num_steps_sampled: 1098900
    num_steps_trained: 1098900
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,550,14972,1098900,8.6585,14.72,-0.02,95.8




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1100898
  custom_metrics: {}
  date: 2021-11-09_07-55-43
  done: false
  episode_len_mean: 95.5
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 8.729700000000015
  episode_reward_min: -0.16000000000000003
  episodes_this_iter: 22
  episodes_total: 10728
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1231322762512026
          entropy_coeff: 0.009999999999999998
          kl: 0.013123210888122641
          policy_loss: -0.012529305652493522
          total_loss: 0.2948206941774558
          vf_explained_var: 0.958781361579895
          vf_loss: 0.3091219393447751
    num_agent_steps_sampled: 1100898
    num_agent_steps_trained: 1100898
    num_steps_sampled: 1100898
    num_steps_trained: 1100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,551,15013.8,1100898,8.7297,14.72,-0.16,95.5


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1102896
  custom_metrics: {}
  date: 2021-11-09_07-56-07
  done: false
  episode_len_mean: 96.72
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 8.819100000000018
  episode_reward_min: -0.16000000000000003
  episodes_this_iter: 20
  episodes_total: 10748
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1704670315697079
          entropy_coeff: 0.009999999999999998
          kl: 0.010594518174290699
          policy_loss: -0.08379408296729837
          total_loss: 0.06854026420485405
          vf_explained_var: 0.9801188111305237
          vf_loss: 0.15640234895760105
    num_agent_steps_sampled: 1102896
    num_agent_steps_trained: 1102896
    num_steps_sampled: 1102896
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,552,15037.9,1102896,8.8191,14.72,-0.16,96.72


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1104894
  custom_metrics: {}
  date: 2021-11-09_07-56-32
  done: false
  episode_len_mean: 96.87
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 9.000200000000017
  episode_reward_min: -0.16000000000000003
  episodes_this_iter: 20
  episodes_total: 10768
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1888712746756418
          entropy_coeff: 0.009999999999999998
          kl: 0.014212928512539413
          policy_loss: -0.013008889254359971
          total_loss: 0.19029493017920426
          vf_explained_var: 0.968769907951355
          vf_loss: 0.2049476663981165
    num_agent_steps_sampled: 1104894
    num_agent_steps_trained: 1104894
    num_steps_sampled: 1104894
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,553,15063.3,1104894,9.0002,14.72,-0.16,96.87


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1106892
  custom_metrics: {}
  date: 2021-11-09_07-56-58
  done: false
  episode_len_mean: 96.53
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 9.201900000000018
  episode_reward_min: -0.16000000000000003
  episodes_this_iter: 21
  episodes_total: 10789
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.0900143506981077
          entropy_coeff: 0.009999999999999998
          kl: 0.01600017059990644
          policy_loss: -0.02075664337192263
          total_loss: 0.17834405195677563
          vf_explained_var: 0.9764227271080017
          vf_loss: 0.19846770717274576
    num_agent_steps_sampled: 1106892
    num_agent_steps_trained: 1106892
    num_steps_sampled: 1106892
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,554,15089.2,1106892,9.2019,14.72,-0.16,96.53


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1108890
  custom_metrics: {}
  date: 2021-11-09_07-57-24
  done: false
  episode_len_mean: 97.97
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 9.376100000000015
  episode_reward_min: 0.9399999999999991
  episodes_this_iter: 21
  episodes_total: 10810
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.0682557668004717
          entropy_coeff: 0.009999999999999998
          kl: 0.01169809022795236
          policy_loss: -0.06873271036006155
          total_loss: 0.07666940243826026
          vf_explained_var: 0.9814188480377197
          vf_loss: 0.14765253625810146
    num_agent_steps_sampled: 1108890
    num_agent_steps_trained: 1108890
    num_steps_sampled: 1108890
    num_steps_trained: 1108

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,555,15115.1,1108890,9.3761,14.71,0.94,97.97


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1110888
  custom_metrics: {}
  date: 2021-11-09_07-57-50
  done: false
  episode_len_mean: 97.5
  episode_media: {}
  episode_reward_max: 14.700000000000012
  episode_reward_mean: 9.161000000000017
  episode_reward_min: 0.9399999999999991
  episodes_this_iter: 21
  episodes_total: 10831
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.185209574869701
          entropy_coeff: 0.009999999999999998
          kl: 0.008080834390663448
          policy_loss: -0.08922261106116432
          total_loss: -0.010568709769064473
          vf_explained_var: 0.9868283271789551
          vf_loss: 0.08468122673886162
    num_agent_steps_sampled: 1110888
    num_agent_steps_trained: 1110888
    num_steps_sampled: 1110888
    num_steps_trained: 111

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,556,15141.3,1110888,9.161,14.7,0.94,97.5


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1112886
  custom_metrics: {}
  date: 2021-11-09_07-58-17
  done: false
  episode_len_mean: 97.89
  episode_media: {}
  episode_reward_max: 14.700000000000012
  episode_reward_mean: 8.940900000000017
  episode_reward_min: 0.9399999999999991
  episodes_this_iter: 19
  episodes_total: 10850
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.111620284829821
          entropy_coeff: 0.009999999999999998
          kl: 0.012338435444875222
          policy_loss: -0.0065840943228630796
          total_loss: 0.12003763678173224
          vf_explained_var: 0.9780434966087341
          vf_loss: 0.1288442284400974
    num_agent_steps_sampled: 1112886
    num_agent_steps_trained: 1112886
    num_steps_sampled: 1112886
    num_steps_trained: 111

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,557,15168,1112886,8.9409,14.7,0.94,97.89


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1114884
  custom_metrics: {}
  date: 2021-11-09_07-58-42
  done: false
  episode_len_mean: 97.63
  episode_media: {}
  episode_reward_max: 14.700000000000012
  episode_reward_mean: 8.968800000000018
  episode_reward_min: 0.9399999999999991
  episodes_this_iter: 21
  episodes_total: 10871
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.109944763921556
          entropy_coeff: 0.009999999999999998
          kl: 0.008729448297387556
          policy_loss: -0.0474420781646456
          total_loss: 0.04615315685846976
          vf_explained_var: 0.979447066783905
          vf_loss: 0.09840238298333827
    num_agent_steps_sampled: 1114884
    num_agent_steps_trained: 1114884
    num_steps_sampled: 1114884
    num_steps_trained: 111488

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,558,15193.2,1114884,8.9688,14.7,0.94,97.63


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1116882
  custom_metrics: {}
  date: 2021-11-09_07-59-10
  done: false
  episode_len_mean: 97.44
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.871700000000018
  episode_reward_min: 4.050000000000024
  episodes_this_iter: 20
  episodes_total: 10891
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1131942110402244
          entropy_coeff: 0.009999999999999998
          kl: 0.010279166425215265
          policy_loss: -0.030730456415386426
          total_loss: 0.13238212543406658
          vf_explained_var: 0.9797065258026123
          vf_loss: 0.16683516514798005
    num_agent_steps_sampled: 1116882
    num_agent_steps_trained: 1116882
    num_steps_sampled: 1116882
    num_steps_trained: 111

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,559,15220.6,1116882,8.8717,14.68,4.05,97.44


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1118880
  custom_metrics: {}
  date: 2021-11-09_07-59-36
  done: false
  episode_len_mean: 97.43
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.759100000000018
  episode_reward_min: 4.050000000000024
  episodes_this_iter: 21
  episodes_total: 10912
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1931125638030824
          entropy_coeff: 0.009999999999999998
          kl: 0.015472837846016099
          policy_loss: -0.030756391762267975
          total_loss: 0.08763373821885104
          vf_explained_var: 0.9757179021835327
          vf_loss: 0.11916823356988884
    num_agent_steps_sampled: 1118880
    num_agent_steps_trained: 1118880
    num_steps_sampled: 1118880
    num_steps_trained: 111

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,560,15246.6,1118880,8.7591,14.68,4.05,97.43


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1120878
  custom_metrics: {}
  date: 2021-11-09_08-00-02
  done: false
  episode_len_mean: 97.18
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.765600000000017
  episode_reward_min: 3.78000000000002
  episodes_this_iter: 21
  episodes_total: 10933
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1499791378066653
          entropy_coeff: 0.009999999999999998
          kl: 0.011037428155291917
          policy_loss: -0.03533576175215698
          total_loss: 0.10954555885954982
          vf_explained_var: 0.9786244630813599
          vf_loss: 0.1484251920311224
    num_agent_steps_sampled: 1120878
    num_agent_steps_trained: 1120878
    num_steps_sampled: 1120878
    num_steps_trained: 112087

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,561,15273.1,1120878,8.7656,14.68,3.78,97.18


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1122876
  custom_metrics: {}
  date: 2021-11-09_08-00-28
  done: false
  episode_len_mean: 96.58
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.841800000000013
  episode_reward_min: 3.78000000000002
  episodes_this_iter: 20
  episodes_total: 10953
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2265807390213013
          entropy_coeff: 0.009999999999999998
          kl: 0.014158091322215911
          policy_loss: -0.005763029094253268
          total_loss: 0.15307200676983312
          vf_explained_var: 0.9755216836929321
          vf_loss: 0.16089550535238925
    num_agent_steps_sampled: 1122876
    num_agent_steps_trained: 1122876
    num_steps_sampled: 1122876
    num_steps_trained: 1122

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,562,15299.1,1122876,8.8418,14.68,3.78,96.58


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1124874
  custom_metrics: {}
  date: 2021-11-09_08-00-54
  done: false
  episode_len_mean: 96.3
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.797000000000017
  episode_reward_min: 3.78000000000002
  episodes_this_iter: 20
  episodes_total: 10973
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.222155671460288
          entropy_coeff: 0.009999999999999998
          kl: 0.010934656796231698
          policy_loss: -0.07321024911389465
          total_loss: 0.02400197529544433
          vf_explained_var: 0.9781283140182495
          vf_loss: 0.10155193798598788
    num_agent_steps_sampled: 1124874
    num_agent_steps_trained: 1124874
    num_steps_sampled: 1124874
    num_steps_trained: 1124874

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,563,15324.8,1124874,8.797,14.68,3.78,96.3


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1126872
  custom_metrics: {}
  date: 2021-11-09_08-01-19
  done: false
  episode_len_mean: 98.47
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.471900000000016
  episode_reward_min: 3.78000000000002
  episodes_this_iter: 21
  episodes_total: 10994
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2328336959793453
          entropy_coeff: 0.009999999999999998
          kl: 0.014763346441153211
          policy_loss: -0.09882621985106241
          total_loss: 0.0915580518721115
          vf_explained_var: 0.9656042456626892
          vf_loss: 0.1920709995641595
    num_agent_steps_sampled: 1126872
    num_agent_steps_trained: 1126872
    num_steps_sampled: 1126872
    num_steps_trained: 1126872

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,564,15349.9,1126872,8.4719,14.68,3.78,98.47


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1128870
  custom_metrics: {}
  date: 2021-11-09_08-01-47
  done: false
  episode_len_mean: 96.95
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 8.906100000000016
  episode_reward_min: 2.7700000000000187
  episodes_this_iter: 22
  episodes_total: 11016
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2523648256347293
          entropy_coeff: 0.009999999999999998
          kl: 0.009825697650125894
          policy_loss: -0.00252850635775498
          total_loss: 0.15776293693731228
          vf_explained_var: 0.983879029750824
          vf_loss: 0.16573260036252793
    num_agent_steps_sampled: 1128870
    num_agent_steps_trained: 1128870
    num_steps_sampled: 1128870
    num_steps_trained: 1128

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,565,15377.4,1128870,8.9061,14.72,2.77,96.95




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1130868
  custom_metrics: {}
  date: 2021-11-09_08-02-28
  done: false
  episode_len_mean: 98.44
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 8.545200000000017
  episode_reward_min: 2.7700000000000187
  episodes_this_iter: 19
  episodes_total: 11035
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1971680101894198
          entropy_coeff: 0.009999999999999998
          kl: 0.01189799337686547
          policy_loss: -0.017777546131539913
          total_loss: 0.18137832581109944
          vf_explained_var: 0.9593068361282349
          vf_loss: 0.20255132225297745
    num_agent_steps_sampled: 1130868
    num_agent_steps_trained: 1130868
    num_steps_sampled: 1130868
    num_steps_trained: 113

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,566,15418.4,1130868,8.5452,14.72,2.77,98.44


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1132866
  custom_metrics: {}
  date: 2021-11-09_08-02-54
  done: false
  episode_len_mean: 97.58
  episode_media: {}
  episode_reward_max: 14.770000000000016
  episode_reward_mean: 8.819800000000019
  episode_reward_min: 2.7700000000000187
  episodes_this_iter: 20
  episodes_total: 11055
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2405942184584482
          entropy_coeff: 0.009999999999999998
          kl: 0.011041575172661905
          policy_loss: -0.027570117264986038
          total_loss: 0.14383365948285376
          vf_explained_var: 0.9799565672874451
          vf_loss: 0.1758508094009899
    num_agent_steps_sampled: 1132866
    num_agent_steps_trained: 1132866
    num_steps_sampled: 1132866
    num_steps_trained: 113

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,567,15444.7,1132866,8.8198,14.77,2.77,97.58




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1134864
  custom_metrics: {}
  date: 2021-11-09_08-03-36
  done: false
  episode_len_mean: 97.05
  episode_media: {}
  episode_reward_max: 14.770000000000016
  episode_reward_mean: 8.680800000000017
  episode_reward_min: -0.07
  episodes_this_iter: 21
  episodes_total: 11076
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.310881505126045
          entropy_coeff: 0.009999999999999998
          kl: 0.008719307028719916
          policy_loss: -0.03013240768618527
          total_loss: 0.10589551071503332
          vf_explained_var: 0.9841346144676208
          vf_loss: 0.14285174377617382
    num_agent_steps_sampled: 1134864
    num_agent_steps_trained: 1134864
    num_steps_sampled: 1134864
    num_steps_trained: 1134864
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,568,15487,1134864,8.6808,14.77,-0.07,97.05


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1136862
  custom_metrics: {}
  date: 2021-11-09_08-04-01
  done: false
  episode_len_mean: 96.53
  episode_media: {}
  episode_reward_max: 14.770000000000016
  episode_reward_mean: 8.732000000000017
  episode_reward_min: -0.07
  episodes_this_iter: 21
  episodes_total: 11097
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.254251408860797
          entropy_coeff: 0.009999999999999998
          kl: 0.01306861441258231
          policy_loss: -0.0015804200388845942
          total_loss: 0.24759219275077893
          vf_explained_var: 0.9591472744941711
          vf_loss: 0.2522950999083973
    num_agent_steps_sampled: 1136862
    num_agent_steps_trained: 1136862
    num_steps_sampled: 1136862
    num_steps_trained: 1136862
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,569,15511.8,1136862,8.732,14.77,-0.07,96.53


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1138860
  custom_metrics: {}
  date: 2021-11-09_08-04-26
  done: false
  episode_len_mean: 98.44
  episode_media: {}
  episode_reward_max: 14.770000000000016
  episode_reward_mean: 8.502300000000018
  episode_reward_min: -0.07
  episodes_this_iter: 20
  episodes_total: 11117
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2945103878066653
          entropy_coeff: 0.009999999999999998
          kl: 0.012386565313414807
          policy_loss: -0.008064204543119385
          total_loss: 0.10562647238728545
          vf_explained_var: 0.9750449657440186
          vf_loss: 0.11770738457285222
    num_agent_steps_sampled: 1138860
    num_agent_steps_trained: 1138860
    num_steps_sampled: 1138860
    num_steps_trained: 1138860
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,570,15536.3,1138860,8.5023,14.77,-0.07,98.44


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1140858
  custom_metrics: {}
  date: 2021-11-09_08-04-53
  done: false
  episode_len_mean: 96.81
  episode_media: {}
  episode_reward_max: 14.770000000000016
  episode_reward_mean: 8.719600000000016
  episode_reward_min: -0.07
  episodes_this_iter: 21
  episodes_total: 11138
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2265705437887282
          entropy_coeff: 0.009999999999999998
          kl: 0.016651337487026737
          policy_loss: -0.02309680956282786
          total_loss: 0.2290974009161194
          vf_explained_var: 0.9727886915206909
          vf_loss: 0.25245741369823615
    num_agent_steps_sampled: 1140858
    num_agent_steps_trained: 1140858
    num_steps_sampled: 1140858
    num_steps_trained: 1140858
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,571,15563.3,1140858,8.7196,14.77,-0.07,96.81


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1142856
  custom_metrics: {}
  date: 2021-11-09_08-05-19
  done: false
  episode_len_mean: 97.65
  episode_media: {}
  episode_reward_max: 14.710000000000012
  episode_reward_mean: 8.467300000000016
  episode_reward_min: -0.07
  episodes_this_iter: 21
  episodes_total: 11159
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3022868082636878
          entropy_coeff: 0.009999999999999998
          kl: 0.01704127863927241
          policy_loss: -0.03846891557886487
          total_loss: 0.2702337888202497
          vf_explained_var: 0.9617610573768616
          vf_loss: 0.3094419985654808
    num_agent_steps_sampled: 1142856
    num_agent_steps_trained: 1142856
    num_steps_sampled: 1142856
    num_steps_trained: 1142856
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,572,15589.9,1142856,8.4673,14.71,-0.07,97.65


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1144854
  custom_metrics: {}
  date: 2021-11-09_08-05-44
  done: false
  episode_len_mean: 98.45
  episode_media: {}
  episode_reward_max: 14.710000000000012
  episode_reward_mean: 8.635900000000017
  episode_reward_min: 1.320000000000007
  episodes_this_iter: 19
  episodes_total: 11178
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2155894313539777
          entropy_coeff: 0.009999999999999998
          kl: 0.012812745842806738
          policy_loss: -0.06593584624074754
          total_loss: 0.12476973460898513
          vf_explained_var: 0.9695593118667603
          vf_loss: 0.19362588363389174
    num_agent_steps_sampled: 1144854
    num_agent_steps_trained: 1144854
    num_steps_sampled: 1144854
    num_steps_trained: 1144

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,573,15614.5,1144854,8.6359,14.71,1.32,98.45


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1146852
  custom_metrics: {}
  date: 2021-11-09_08-06-11
  done: false
  episode_len_mean: 97.37
  episode_media: {}
  episode_reward_max: 14.710000000000012
  episode_reward_mean: 8.775400000000017
  episode_reward_min: 1.320000000000007
  episodes_this_iter: 22
  episodes_total: 11200
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.212841012364342
          entropy_coeff: 0.009999999999999998
          kl: 0.0104673565345143
          policy_loss: -0.033277778558078265
          total_loss: 0.07193362006828899
          vf_explained_var: 0.9802035093307495
          vf_loss: 0.10979480283955732
    num_agent_steps_sampled: 1146852
    num_agent_steps_trained: 1146852
    num_steps_sampled: 1146852
    num_steps_trained: 114685

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,574,15641.3,1146852,8.7754,14.71,1.32,97.37


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1148850
  custom_metrics: {}
  date: 2021-11-09_08-06-37
  done: false
  episode_len_mean: 96.63
  episode_media: {}
  episode_reward_max: 14.660000000000014
  episode_reward_mean: 8.849800000000016
  episode_reward_min: 2.630000000000019
  episodes_this_iter: 20
  episodes_total: 11220
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.256214916138422
          entropy_coeff: 0.009999999999999998
          kl: 0.010511875537661109
          policy_loss: -0.018334892250242686
          total_loss: 0.11242261616779226
          vf_explained_var: 0.9820168614387512
          vf_loss: 0.1357425640381518
    num_agent_steps_sampled: 1148850
    num_agent_steps_trained: 1148850
    num_steps_sampled: 1148850
    num_steps_trained: 11488

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,575,15667.1,1148850,8.8498,14.66,2.63,96.63


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1150848
  custom_metrics: {}
  date: 2021-11-09_08-07-00
  done: false
  episode_len_mean: 99.63
  episode_media: {}
  episode_reward_max: 14.660000000000014
  episode_reward_mean: 8.530600000000018
  episode_reward_min: 1.9900000000000195
  episodes_this_iter: 18
  episodes_total: 11238
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2499564687410991
          entropy_coeff: 0.009999999999999998
          kl: 0.014086590911982192
          policy_loss: -0.019357238053565935
          total_loss: 0.19145376303543646
          vf_explained_var: 0.9575318694114685
          vf_loss: 0.21315676765072913
    num_agent_steps_sampled: 1150848
    num_agent_steps_trained: 1150848
    num_steps_sampled: 1150848
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,576,15690.1,1150848,8.5306,14.66,1.99,99.63


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1152846
  custom_metrics: {}
  date: 2021-11-09_08-07-25
  done: false
  episode_len_mean: 99.9
  episode_media: {}
  episode_reward_max: 14.660000000000014
  episode_reward_mean: 8.591000000000017
  episode_reward_min: 1.3300000000000016
  episodes_this_iter: 20
  episodes_total: 11258
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2479133872758774
          entropy_coeff: 0.009999999999999998
          kl: 0.010587978012012004
          policy_loss: -0.030192634237131904
          total_loss: 0.08132267039091814
          vf_explained_var: 0.9767553210258484
          vf_loss: 0.11636248481947752
    num_agent_steps_sampled: 1152846
    num_agent_steps_trained: 1152846
    num_steps_sampled: 1152846
    num_steps_trained: 115

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,577,15715.7,1152846,8.591,14.66,1.33,99.9


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1154844
  custom_metrics: {}
  date: 2021-11-09_08-07-52
  done: false
  episode_len_mean: 99.2
  episode_media: {}
  episode_reward_max: 14.660000000000013
  episode_reward_mean: 8.664300000000019
  episode_reward_min: 1.3300000000000016
  episodes_this_iter: 21
  episodes_total: 11279
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.229554318530219
          entropy_coeff: 0.009999999999999998
          kl: 0.016557797403242053
          policy_loss: -0.006706793322449639
          total_loss: 0.17160867037517682
          vf_explained_var: 0.9756263494491577
          vf_loss: 0.17867593006009147
    num_agent_steps_sampled: 1154844
    num_agent_steps_trained: 1154844
    num_steps_sampled: 1154844
    num_steps_trained: 1154

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,578,15742.7,1154844,8.6643,14.66,1.33,99.2


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1156842
  custom_metrics: {}
  date: 2021-11-09_08-08-19
  done: false
  episode_len_mean: 99.14
  episode_media: {}
  episode_reward_max: 14.740000000000013
  episode_reward_mean: 8.841900000000017
  episode_reward_min: 1.3300000000000016
  episodes_this_iter: 22
  episodes_total: 11301
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1765087644259136
          entropy_coeff: 0.009999999999999998
          kl: 0.010686575526623375
          policy_loss: -0.05879766469200452
          total_loss: 0.08633443895017817
          vf_explained_var: 0.9779351353645325
          vf_loss: 0.14919416939928418
    num_agent_steps_sampled: 1156842
    num_agent_steps_trained: 1156842
    num_steps_sampled: 1156842
    num_steps_trained: 115

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,579,15769.2,1156842,8.8419,14.74,1.33,99.14


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1158840
  custom_metrics: {}
  date: 2021-11-09_08-08-45
  done: false
  episode_len_mean: 100.22
  episode_media: {}
  episode_reward_max: 14.740000000000013
  episode_reward_mean: 8.636700000000017
  episode_reward_min: 0.8600000000000059
  episodes_this_iter: 19
  episodes_total: 11320
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2081686417261759
          entropy_coeff: 0.009999999999999998
          kl: 0.013227539862893321
          policy_loss: -0.06354603643218676
          total_loss: 0.15919230534207254
          vf_explained_var: 0.973554253578186
          vf_loss: 0.22528544637773718
    num_agent_steps_sampled: 1158840
    num_agent_steps_trained: 1158840
    num_steps_sampled: 1158840
    num_steps_trained: 115

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,580,15795.2,1158840,8.6367,14.74,0.86,100.22


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1160838
  custom_metrics: {}
  date: 2021-11-09_08-09-08
  done: false
  episode_len_mean: 100.13
  episode_media: {}
  episode_reward_max: 14.740000000000013
  episode_reward_mean: 8.667200000000015
  episode_reward_min: 0.8600000000000059
  episodes_this_iter: 17
  episodes_total: 11337
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2825754727636065
          entropy_coeff: 0.009999999999999998
          kl: 0.01257599439837944
          policy_loss: -0.03259665391274861
          total_loss: 0.11966072961333252
          vf_explained_var: 0.9658541679382324
          vf_loss: 0.1560181984944003
    num_agent_steps_sampled: 1160838
    num_agent_steps_trained: 1160838
    num_steps_sampled: 1160838
    num_steps_trained: 1160

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,581,15818.4,1160838,8.6672,14.74,0.86,100.13


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1162836
  custom_metrics: {}
  date: 2021-11-09_08-09-34
  done: false
  episode_len_mean: 100.14
  episode_media: {}
  episode_reward_max: 14.740000000000013
  episode_reward_mean: 8.668400000000016
  episode_reward_min: 0.8600000000000059
  episodes_this_iter: 21
  episodes_total: 11358
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2019055057139623
          entropy_coeff: 0.009999999999999998
          kl: 0.013594934574723928
          policy_loss: -0.0027881018462635222
          total_loss: 0.36108723653347363
          vf_explained_var: 0.9585960507392883
          vf_loss: 0.36609498524949663
    num_agent_steps_sampled: 1162836
    num_agent_steps_trained: 1162836
    num_steps_sampled: 1162836
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,582,15844.1,1162836,8.6684,14.74,0.86,100.14




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1164834
  custom_metrics: {}
  date: 2021-11-09_08-10-19
  done: false
  episode_len_mean: 99.03
  episode_media: {}
  episode_reward_max: 14.750000000000012
  episode_reward_mean: 8.511400000000018
  episode_reward_min: -0.03
  episodes_this_iter: 23
  episodes_total: 11381
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2293853634879703
          entropy_coeff: 0.009999999999999998
          kl: 0.011417585142053963
          policy_loss: -0.11314159502230939
          total_loss: 0.11457300783977623
          vf_explained_var: 0.9689325094223022
          vf_loss: 0.2317785130370231
    num_agent_steps_sampled: 1164834
    num_agent_steps_trained: 1164834
    num_steps_sampled: 1164834
    num_steps_trained: 1164834
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,583,15889.4,1164834,8.5114,14.75,-0.03,99.03


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1166832
  custom_metrics: {}
  date: 2021-11-09_08-10-46
  done: false
  episode_len_mean: 98.94
  episode_media: {}
  episode_reward_max: 14.750000000000012
  episode_reward_mean: 8.214400000000017
  episode_reward_min: -0.03
  episodes_this_iter: 20
  episodes_total: 11401
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2794532276335218
          entropy_coeff: 0.009999999999999998
          kl: 0.01337932516832524
          policy_loss: -0.07034727508823077
          total_loss: 0.09614722098977793
          vf_explained_var: 0.9695448875427246
          vf_loss: 0.1696450375020504
    num_agent_steps_sampled: 1166832
    num_agent_steps_trained: 1166832
    num_steps_sampled: 1166832
    num_steps_trained: 1166832
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,584,15915.9,1166832,8.2144,14.75,-0.03,98.94




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1168830
  custom_metrics: {}
  date: 2021-11-09_08-11-23
  done: false
  episode_len_mean: 98.23
  episode_media: {}
  episode_reward_max: 14.750000000000012
  episode_reward_mean: 8.385100000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 11421
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2547413684072948
          entropy_coeff: 0.009999999999999998
          kl: 0.009436446555038398
          policy_loss: -0.09305353920374598
          total_loss: 0.04251474084421283
          vf_explained_var: 0.9756455421447754
          vf_loss: 0.14131377985080082
    num_agent_steps_sampled: 1168830
    num_agent_steps_trained: 1168830
    num_steps_sampled: 1168830
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,585,15952.9,1168830,8.3851,14.75,-0.06,98.23




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1170828
  custom_metrics: {}
  date: 2021-11-09_08-12-03
  done: false
  episode_len_mean: 94.9
  episode_media: {}
  episode_reward_max: 14.750000000000012
  episode_reward_mean: 8.584700000000016
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 11442
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.325715292635418
          entropy_coeff: 0.009999999999999998
          kl: 0.009186123164908907
          policy_loss: -0.08644554889982654
          total_loss: 0.11345042998769454
          vf_explained_var: 0.956781268119812
          vf_loss: 0.20653165497240566
    num_agent_steps_sampled: 1170828
    num_agent_steps_trained: 1170828
    num_steps_sampled: 1170828
    num_steps_trained: 117

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,586,15993.2,1170828,8.5847,14.75,-0.06,94.9


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1172826
  custom_metrics: {}
  date: 2021-11-09_08-12-31
  done: false
  episode_len_mean: 93.91
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 8.614000000000015
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 22
  episodes_total: 11464
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2356121233531407
          entropy_coeff: 0.009999999999999998
          kl: 0.01564862659729151
          policy_loss: -0.010710703457395237
          total_loss: 0.22066519229362408
          vf_explained_var: 0.9725030660629272
          vf_loss: 0.2324522863186541
    num_agent_steps_sampled: 1172826
    num_agent_steps_trained: 1172826
    num_steps_sampled: 1172826
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,587,16020.7,1172826,8.614,14.72,-0.06,93.91


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1174824
  custom_metrics: {}
  date: 2021-11-09_08-12-54
  done: false
  episode_len_mean: 97.97
  episode_media: {}
  episode_reward_max: 14.780000000000012
  episode_reward_mean: 8.584600000000016
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 11483
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3416056735174997
          entropy_coeff: 0.009999999999999998
          kl: 0.014049330448570783
          policy_loss: -0.025090493669822102
          total_loss: 0.2797824927383945
          vf_explained_var: 0.9498615860939026
          vf_loss: 0.30816210288377033
    num_agent_steps_sampled: 1174824
    num_agent_steps_trained: 1174824
    num_steps_sampled: 1174824
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,588,16044.1,1174824,8.5846,14.78,-0.06,97.97


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1176822
  custom_metrics: {}
  date: 2021-11-09_08-13-20
  done: false
  episode_len_mean: 98.34
  episode_media: {}
  episode_reward_max: 14.780000000000012
  episode_reward_mean: 8.669300000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 11502
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.25374234801247
          entropy_coeff: 0.009999999999999998
          kl: 0.009870591461370653
          policy_loss: 0.004632343316362018
          total_loss: 0.1499114258126134
          vf_explained_var: 0.9779947400093079
          vf_loss: 0.1507016537444932
    num_agent_steps_sampled: 1176822
    num_agent_steps_trained: 1176822
    num_steps_sampled: 1176822
    num_steps_trained: 1176

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,589,16070.1,1176822,8.6693,14.78,-0.06,98.34


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1178820
  custom_metrics: {}
  date: 2021-11-09_08-13-46
  done: false
  episode_len_mean: 98.0
  episode_media: {}
  episode_reward_max: 14.780000000000014
  episode_reward_mean: 8.860500000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 11523
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2458668277377174
          entropy_coeff: 0.009999999999999998
          kl: 0.010775609476583464
          policy_loss: 0.008329662999936512
          total_loss: 0.1646213180252484
          vf_explained_var: 0.983469545841217
          vf_loss: 0.16098312382541952
    num_agent_steps_sampled: 1178820
    num_agent_steps_trained: 1178820
    num_steps_sampled: 1178820
    num_steps_trained: 117

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,590,16096.4,1178820,8.8605,14.78,-0.06,98


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1180818
  custom_metrics: {}
  date: 2021-11-09_08-14-11
  done: false
  episode_len_mean: 99.88
  episode_media: {}
  episode_reward_max: 14.780000000000014
  episode_reward_mean: 9.064700000000018
  episode_reward_min: 3.940000000000019
  episodes_this_iter: 20
  episodes_total: 11543
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.283076254526774
          entropy_coeff: 0.009999999999999998
          kl: 0.00783134667473679
          policy_loss: -0.06132342472140278
          total_loss: 0.037316318840852805
          vf_explained_var: 0.9828341007232666
          vf_loss: 0.10582556937422072
    num_agent_steps_sampled: 1180818
    num_agent_steps_trained: 1180818
    num_steps_sampled: 1180818
    num_steps_trained: 11808

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,591,16121.2,1180818,9.0647,14.78,3.94,99.88


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1182816
  custom_metrics: {}
  date: 2021-11-09_08-14-37
  done: false
  episode_len_mean: 101.4
  episode_media: {}
  episode_reward_max: 14.870000000000013
  episode_reward_mean: 9.015500000000017
  episode_reward_min: 3.8700000000000276
  episodes_this_iter: 20
  episodes_total: 11563
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2606658634685335
          entropy_coeff: 0.009999999999999998
          kl: 0.017106901156946464
          policy_loss: -0.047630026670438905
          total_loss: 0.11598009958508469
          vf_explained_var: 0.9793890714645386
          vf_loss: 0.1638859088044791
    num_agent_steps_sampled: 1182816
    num_agent_steps_trained: 1182816
    num_steps_sampled: 1182816
    num_steps_trained: 118

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,592,16146.5,1182816,9.0155,14.87,3.87,101.4


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1184814
  custom_metrics: {}
  date: 2021-11-09_08-15-00
  done: false
  episode_len_mean: 101.77
  episode_media: {}
  episode_reward_max: 14.870000000000013
  episode_reward_mean: 8.98870000000002
  episode_reward_min: 3.8700000000000276
  episodes_this_iter: 17
  episodes_total: 11580
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.266347511893227
          entropy_coeff: 0.009999999999999998
          kl: 0.006544807724119312
          policy_loss: -0.11705489563090461
          total_loss: -0.04243711508100941
          vf_explained_var: 0.976871907711029
          vf_loss: 0.08256367542559193
    num_agent_steps_sampled: 1184814
    num_agent_steps_trained: 1184814
    num_steps_sampled: 1184814
    num_steps_trained: 1184

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,593,16170,1184814,8.9887,14.87,3.87,101.77


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1186812
  custom_metrics: {}
  date: 2021-11-09_08-15-26
  done: false
  episode_len_mean: 101.67
  episode_media: {}
  episode_reward_max: 14.870000000000013
  episode_reward_mean: 8.868300000000017
  episode_reward_min: 3.8700000000000276
  episodes_this_iter: 21
  episodes_total: 11601
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.236019470010485
          entropy_coeff: 0.009999999999999998
          kl: 0.011052569961059805
          policy_loss: -0.045649270145666034
          total_loss: 0.05834439465155204
          vf_explained_var: 0.9822437167167664
          vf_loss: 0.10838702330809263
    num_agent_steps_sampled: 1186812
    num_agent_steps_trained: 1186812
    num_steps_sampled: 1186812
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,594,16195.7,1186812,8.8683,14.87,3.87,101.67


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1188810
  custom_metrics: {}
  date: 2021-11-09_08-15-49
  done: false
  episode_len_mean: 103.79
  episode_media: {}
  episode_reward_max: 14.870000000000013
  episode_reward_mean: 8.523800000000017
  episode_reward_min: 3.8700000000000276
  episodes_this_iter: 18
  episodes_total: 11619
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.252519206773667
          entropy_coeff: 0.009999999999999998
          kl: 0.0170087908002479
          policy_loss: -0.02996995303602446
          total_loss: 0.1671207610429043
          vf_explained_var: 0.9636685252189636
          vf_loss: 0.1973557464955818
    num_agent_steps_sampled: 1188810
    num_agent_steps_trained: 1188810
    num_steps_sampled: 1188810
    num_steps_trained: 1188810

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,595,16218.5,1188810,8.5238,14.87,3.87,103.79


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1190808
  custom_metrics: {}
  date: 2021-11-09_08-16-14
  done: false
  episode_len_mean: 105.13
  episode_media: {}
  episode_reward_max: 14.870000000000013
  episode_reward_mean: 8.312900000000019
  episode_reward_min: 3.8700000000000276
  episodes_this_iter: 19
  episodes_total: 11638
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2560804849579221
          entropy_coeff: 0.009999999999999998
          kl: 0.012650307943018366
          policy_loss: -0.06597449208299319
          total_loss: 0.09537678439879702
          vf_explained_var: 0.9651963114738464
          vf_loss: 0.16479357509385972
    num_agent_steps_sampled: 1190808
    num_agent_steps_trained: 1190808
    num_steps_sampled: 1190808
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,596,16243.6,1190808,8.3129,14.87,3.87,105.13


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1192806
  custom_metrics: {}
  date: 2021-11-09_08-16-39
  done: false
  episode_len_mean: 104.88
  episode_media: {}
  episode_reward_max: 14.850000000000012
  episode_reward_mean: 8.481600000000018
  episode_reward_min: 3.9200000000000252
  episodes_this_iter: 19
  episodes_total: 11657
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2683675368626912
          entropy_coeff: 0.009999999999999998
          kl: 0.010050105375776072
          policy_loss: -0.07570912476096835
          total_loss: 0.054231782701043854
          vf_explained_var: 0.9834554195404053
          vf_loss: 0.135380338283167
    num_agent_steps_sampled: 1192806
    num_agent_steps_trained: 1192806
    num_steps_sampled: 1192806
    num_steps_trained: 119

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,597,16268.6,1192806,8.4816,14.85,3.92,104.88


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1194804
  custom_metrics: {}
  date: 2021-11-09_08-17-04
  done: false
  episode_len_mean: 103.44
  episode_media: {}
  episode_reward_max: 14.850000000000012
  episode_reward_mean: 8.640300000000018
  episode_reward_min: 3.9200000000000252
  episodes_this_iter: 20
  episodes_total: 11677
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3325378173873539
          entropy_coeff: 0.009999999999999998
          kl: 0.00992387427293198
          policy_loss: -0.03600194157943839
          total_loss: 0.03878304967213245
          vf_explained_var: 0.9898245930671692
          vf_loss: 0.0809571103592004
    num_agent_steps_sampled: 1194804
    num_agent_steps_trained: 1194804
    num_steps_sampled: 1194804
    num_steps_trained: 1194

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,598,16293.9,1194804,8.6403,14.85,3.92,103.44


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1196802
  custom_metrics: {}
  date: 2021-11-09_08-17-28
  done: false
  episode_len_mean: 103.98
  episode_media: {}
  episode_reward_max: 14.850000000000012
  episode_reward_mean: 8.669700000000017
  episode_reward_min: 3.9200000000000252
  episodes_this_iter: 19
  episodes_total: 11696
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2695517869222732
          entropy_coeff: 0.009999999999999998
          kl: 0.015443643223734461
          policy_loss: -0.08381520338090402
          total_loss: 0.04689810343441509
          vf_explained_var: 0.9754631519317627
          vf_loss: 0.13227684747959886
    num_agent_steps_sampled: 1196802
    num_agent_steps_trained: 1196802
    num_steps_sampled: 1196802
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,599,16317.8,1196802,8.6697,14.85,3.92,103.98


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1198800
  custom_metrics: {}
  date: 2021-11-09_08-17-54
  done: false
  episode_len_mean: 103.05
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.785200000000017
  episode_reward_min: 3.9200000000000252
  episodes_this_iter: 20
  episodes_total: 11716
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.303409613314129
          entropy_coeff: 0.009999999999999998
          kl: 0.013575976232199548
          policy_loss: 0.0038519746845676783
          total_loss: 0.1369843549405535
          vf_explained_var: 0.9800182580947876
          vf_loss: 0.13638073810864063
    num_agent_steps_sampled: 1198800
    num_agent_steps_trained: 1198800
    num_steps_sampled: 1198800
    num_steps_trained: 119

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,600,16343.2,1198800,8.7852,14.71,3.92,103.05




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1200798
  custom_metrics: {}
  date: 2021-11-09_08-18-32
  done: false
  episode_len_mean: 104.07
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.771600000000017
  episode_reward_min: 3.9200000000000252
  episodes_this_iter: 19
  episodes_total: 11735
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3613951705750964
          entropy_coeff: 0.009999999999999998
          kl: 0.011087387550959383
          policy_loss: -0.012078646712359928
          total_loss: 0.09706137631798074
          vf_explained_var: 0.9780488610267639
          vf_loss: 0.11476204052035298
    num_agent_steps_sampled: 1200798
    num_agent_steps_trained: 1200798
    num_steps_sampled: 1200798
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,601,16381.6,1200798,8.7716,14.71,3.92,104.07


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1202796
  custom_metrics: {}
  date: 2021-11-09_08-18-59
  done: false
  episode_len_mean: 103.75
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.990200000000018
  episode_reward_min: 3.9300000000000264
  episodes_this_iter: 19
  episodes_total: 11754
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2996774713198345
          entropy_coeff: 0.009999999999999998
          kl: 0.012234850658557722
          policy_loss: -0.03783887880189078
          total_loss: 0.08592387955813181
          vf_explained_var: 0.9824906587600708
          vf_loss: 0.1279404919062342
    num_agent_steps_sampled: 1202796
    num_agent_steps_trained: 1202796
    num_steps_sampled: 1202796
    num_steps_trained: 120

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,602,16408.9,1202796,8.9902,14.71,3.93,103.75




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1204794
  custom_metrics: {}
  date: 2021-11-09_08-19-38
  done: false
  episode_len_mean: 103.13
  episode_media: {}
  episode_reward_max: 14.640000000000015
  episode_reward_mean: 8.738300000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 11774
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3379359869729905
          entropy_coeff: 0.009999999999999998
          kl: 0.016331907889772766
          policy_loss: -0.00167775982547374
          total_loss: 0.31654432574730546
          vf_explained_var: 0.9607222676277161
          vf_loss: 0.3198291964119389
    num_agent_steps_sampled: 1204794
    num_agent_steps_trained: 1204794
    num_steps_sampled: 1204794
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,603,16447.4,1204794,8.7383,14.64,-0.06,103.13




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1206792
  custom_metrics: {}
  date: 2021-11-09_08-20-17
  done: false
  episode_len_mean: 100.9
  episode_media: {}
  episode_reward_max: 14.740000000000013
  episode_reward_mean: 8.944800000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 22
  episodes_total: 11796
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3255388435863313
          entropy_coeff: 0.009999999999999998
          kl: 0.012947364114134023
          policy_loss: -0.01470340472601709
          total_loss: 0.49971890409610104
          vf_explained_var: 0.9380393624305725
          vf_loss: 0.5183450663728374
    num_agent_steps_sampled: 1206792
    num_agent_steps_trained: 1206792
    num_steps_sampled: 1206792
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,604,16486.2,1206792,8.9448,14.74,-0.06,100.9


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1208790
  custom_metrics: {}
  date: 2021-11-09_08-20-44
  done: false
  episode_len_mean: 100.65
  episode_media: {}
  episode_reward_max: 14.740000000000013
  episode_reward_mean: 9.206900000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 11815
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3255429563068208
          entropy_coeff: 0.009999999999999998
          kl: 0.010474564180351075
          policy_loss: -0.013407075458339282
          total_loss: 0.11360417489139807
          vf_explained_var: 0.9829782247543335
          vf_loss: 0.13271647705031292
    num_agent_steps_sampled: 1208790
    num_agent_steps_trained: 1208790
    num_steps_sampled: 1208790
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,605,16513,1208790,9.2069,14.74,-0.06,100.65


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1210788
  custom_metrics: {}
  date: 2021-11-09_08-21-10
  done: false
  episode_len_mean: 99.24
  episode_media: {}
  episode_reward_max: 14.740000000000013
  episode_reward_mean: 9.348700000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 11836
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.32356158608482
          entropy_coeff: 0.009999999999999998
          kl: 0.009321907184005388
          policy_loss: -0.02062250854713576
          total_loss: 0.08662578416544767
          vf_explained_var: 0.9833887219429016
          vf_loss: 0.11376455571679842
    num_agent_steps_sampled: 1210788
    num_agent_steps_trained: 1210788
    num_steps_sampled: 1210788
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,606,16539.3,1210788,9.3487,14.74,-0.06,99.24


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1212786
  custom_metrics: {}
  date: 2021-11-09_08-21-36
  done: false
  episode_len_mean: 98.12
  episode_media: {}
  episode_reward_max: 14.740000000000013
  episode_reward_mean: 8.96250000000002
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 11856
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3317113447756994
          entropy_coeff: 0.009999999999999998
          kl: 0.016014919219598446
          policy_loss: -0.12007804864219257
          total_loss: 0.03841071946635133
          vf_explained_var: 0.9795452952384949
          vf_loss: 0.1602621178187075
    num_agent_steps_sampled: 1212786
    num_agent_steps_trained: 1212786
    num_steps_sampled: 1212786
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,607,16565.1,1212786,8.9625,14.74,-0.06,98.12


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1214784
  custom_metrics: {}
  date: 2021-11-09_08-22-01
  done: false
  episode_len_mean: 98.2
  episode_media: {}
  episode_reward_max: 14.740000000000013
  episode_reward_mean: 9.187700000000019
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 11875
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.287409024011521
          entropy_coeff: 0.009999999999999998
          kl: 0.009245091251705924
          policy_loss: -0.03965367300524598
          total_loss: 0.0591152681126481
          vf_explained_var: 0.9857556819915771
          vf_loss: 0.10497904874916587
    num_agent_steps_sampled: 1214784
    num_agent_steps_trained: 1214784
    num_steps_sampled: 1214784
    num_steps_trained: 121

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,608,16589.9,1214784,9.1877,14.74,-0.06,98.2


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1216782
  custom_metrics: {}
  date: 2021-11-09_08-22-27
  done: false
  episode_len_mean: 99.69
  episode_media: {}
  episode_reward_max: 14.740000000000014
  episode_reward_mean: 9.063700000000019
  episode_reward_min: 4.260000000000024
  episodes_this_iter: 20
  episodes_total: 11895
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3534018039703368
          entropy_coeff: 0.009999999999999998
          kl: 0.008400210989836664
          policy_loss: 0.0015835936935175033
          total_loss: 0.0438487762053098
          vf_explained_var: 0.9932838082313538
          vf_loss: 0.04974422014894939
    num_agent_steps_sampled: 1216782
    num_agent_steps_trained: 1216782
    num_steps_sampled: 1216782
    num_steps_trained: 1216

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,609,16616.2,1216782,9.0637,14.74,4.26,99.69


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1218780
  custom_metrics: {}
  date: 2021-11-09_08-22-52
  done: false
  episode_len_mean: 99.16
  episode_media: {}
  episode_reward_max: 14.780000000000014
  episode_reward_mean: 8.933900000000017
  episode_reward_min: 4.260000000000024
  episodes_this_iter: 21
  episodes_total: 11916
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2869734292938595
          entropy_coeff: 0.009999999999999998
          kl: 0.011293992615096268
          policy_loss: -0.02116825899020547
          total_loss: 0.08384563069613207
          vf_explained_var: 0.9828331470489502
          vf_loss: 0.10974276851685275
    num_agent_steps_sampled: 1218780
    num_agent_steps_trained: 1218780
    num_steps_sampled: 1218780
    num_steps_trained: 1218

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,610,16641,1218780,8.9339,14.78,4.26,99.16


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1220778
  custom_metrics: {}
  date: 2021-11-09_08-23-18
  done: false
  episode_len_mean: 99.88
  episode_media: {}
  episode_reward_max: 14.780000000000014
  episode_reward_mean: 8.771500000000017
  episode_reward_min: 4.280000000000023
  episodes_this_iter: 20
  episodes_total: 11936
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3372842771666391
          entropy_coeff: 0.009999999999999998
          kl: 0.010385735098632236
          policy_loss: -0.04039885603955814
          total_loss: 0.05219663934161266
          vf_explained_var: 0.9855237007141113
          vf_loss: 0.09848216492682696
    num_agent_steps_sampled: 1220778
    num_agent_steps_trained: 1220778
    num_steps_sampled: 1220778
    num_steps_trained: 1220

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,611,16666.7,1220778,8.7715,14.78,4.28,99.88


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1222776
  custom_metrics: {}
  date: 2021-11-09_08-23-42
  done: false
  episode_len_mean: 100.71
  episode_media: {}
  episode_reward_max: 14.820000000000013
  episode_reward_mean: 8.802500000000018
  episode_reward_min: 4.280000000000023
  episodes_this_iter: 18
  episodes_total: 11954
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3415996994291033
          entropy_coeff: 0.009999999999999998
          kl: 0.012873443411758246
          policy_loss: 0.03333962935776938
          total_loss: 0.1269429067948035
          vf_explained_var: 0.9823428392410278
          vf_loss: 0.09773992945750555
    num_agent_steps_sampled: 1222776
    num_agent_steps_trained: 1222776
    num_steps_sampled: 1222776
    num_steps_trained: 12227

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,612,16691,1222776,8.8025,14.82,4.28,100.71


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1224774
  custom_metrics: {}
  date: 2021-11-09_08-24-06
  done: false
  episode_len_mean: 101.6
  episode_media: {}
  episode_reward_max: 14.890000000000011
  episode_reward_mean: 8.674000000000017
  episode_reward_min: 3.800000000000022
  episodes_this_iter: 21
  episodes_total: 11975
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3951383602051508
          entropy_coeff: 0.009999999999999998
          kl: 0.009472101293147971
          policy_loss: -0.03439628332853317
          total_loss: 0.019542882254435904
          vf_explained_var: 0.9915066957473755
          vf_loss: 0.06106293484391201
    num_agent_steps_sampled: 1224774
    num_agent_steps_trained: 1224774
    num_steps_sampled: 1224774
    num_steps_trained: 122

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,613,16715.4,1224774,8.674,14.89,3.8,101.6


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1226772
  custom_metrics: {}
  date: 2021-11-09_08-24-32
  done: false
  episode_len_mean: 101.39
  episode_media: {}
  episode_reward_max: 14.890000000000011
  episode_reward_mean: 8.834400000000018
  episode_reward_min: 3.800000000000022
  episodes_this_iter: 19
  episodes_total: 11994
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3589021319434755
          entropy_coeff: 0.009999999999999998
          kl: 0.008525342608613396
          policy_loss: -0.10235122768651872
          total_loss: -0.00928196974453472
          vf_explained_var: 0.9803726077079773
          vf_loss: 0.10051310120948724
    num_agent_steps_sampled: 1226772
    num_agent_steps_trained: 1226772
    num_steps_sampled: 1226772
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,614,16741.4,1226772,8.8344,14.89,3.8,101.39


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1228770
  custom_metrics: {}
  date: 2021-11-09_08-24-59
  done: false
  episode_len_mean: 101.26
  episode_media: {}
  episode_reward_max: 14.890000000000011
  episode_reward_mean: 8.632700000000018
  episode_reward_min: 2.930000000000011
  episodes_this_iter: 21
  episodes_total: 12015
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.72081298828125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3489574403989883
          entropy_coeff: 0.009999999999999998
          kl: 0.02241078035136297
          policy_loss: -0.011188921758106776
          total_loss: 0.17491353595008452
          vf_explained_var: 0.9743120670318604
          vf_loss: 0.18343805209511801
    num_agent_steps_sampled: 1228770
    num_agent_steps_trained: 1228770
    num_steps_sampled: 1228770
    num_steps_trained: 122

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,615,16768.4,1228770,8.6327,14.89,2.93,101.26


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1230768
  custom_metrics: {}
  date: 2021-11-09_08-25-25
  done: false
  episode_len_mean: 100.19
  episode_media: {}
  episode_reward_max: 14.890000000000011
  episode_reward_mean: 8.71730000000002
  episode_reward_min: 2.930000000000011
  episodes_this_iter: 20
  episodes_total: 12035
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.283601290839059
          entropy_coeff: 0.009999999999999998
          kl: 0.014635536565390515
          policy_loss: -0.02218578244958605
          total_loss: 0.2392082364608844
          vf_explained_var: 0.9675130844116211
          vf_loss: 0.2584058028956254
    num_agent_steps_sampled: 1230768
    num_agent_steps_trained: 1230768
    num_steps_sampled: 1230768
    num_steps_trained: 12307

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,616,16793.9,1230768,8.7173,14.89,2.93,100.19


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1232766
  custom_metrics: {}
  date: 2021-11-09_08-25-52
  done: false
  episode_len_mean: 98.4
  episode_media: {}
  episode_reward_max: 14.890000000000011
  episode_reward_mean: 8.985800000000017
  episode_reward_min: 2.930000000000011
  episodes_this_iter: 21
  episodes_total: 12056
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.254894391127995
          entropy_coeff: 0.009999999999999998
          kl: 0.006907387903316812
          policy_loss: -0.006019060961192563
          total_loss: 0.058138880515027615
          vf_explained_var: 0.9918188452720642
          vf_loss: 0.06923848097877842
    num_agent_steps_sampled: 1232766
    num_agent_steps_trained: 1232766
    num_steps_sampled: 1232766
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,617,16820.8,1232766,8.9858,14.89,2.93,98.4


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1234764
  custom_metrics: {}
  date: 2021-11-09_08-26-17
  done: false
  episode_len_mean: 98.66
  episode_media: {}
  episode_reward_max: 14.720000000000015
  episode_reward_mean: 9.124400000000017
  episode_reward_min: 2.930000000000011
  episodes_this_iter: 19
  episodes_total: 12075
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.2826514232726325
          entropy_coeff: 0.009999999999999998
          kl: 0.008412801799957771
          policy_loss: -0.031301449948833104
          total_loss: 0.08162949443573043
          vf_explained_var: 0.9817975759506226
          vf_loss: 0.11666137318880786
    num_agent_steps_sampled: 1234764
    num_agent_steps_trained: 1234764
    num_steps_sampled: 1234764
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,618,16846,1234764,9.1244,14.72,2.93,98.66




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1236762
  custom_metrics: {}
  date: 2021-11-09_08-26-58
  done: false
  episode_len_mean: 97.17
  episode_media: {}
  episode_reward_max: 14.720000000000015
  episode_reward_mean: 8.805700000000018
  episode_reward_min: -0.05
  episodes_this_iter: 22
  episodes_total: 12097
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.311692013627007
          entropy_coeff: 0.009999999999999998
          kl: 0.013338883004562391
          policy_loss: -0.010666448729378837
          total_loss: 0.22383627325651193
          vf_explained_var: 0.944995105266571
          vf_loss: 0.23319738241178647
    num_agent_steps_sampled: 1236762
    num_agent_steps_trained: 1236762
    num_steps_sampled: 1236762
    num_steps_trained: 1236762
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,619,16887.4,1236762,8.8057,14.72,-0.05,97.17


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1238760
  custom_metrics: {}
  date: 2021-11-09_08-27-23
  done: false
  episode_len_mean: 97.95
  episode_media: {}
  episode_reward_max: 14.720000000000015
  episode_reward_mean: 8.797000000000017
  episode_reward_min: -0.05
  episodes_this_iter: 20
  episodes_total: 12117
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.2803097338903517
          entropy_coeff: 0.009999999999999998
          kl: 0.008421634479631549
          policy_loss: 0.006516434500614802
          total_loss: 0.09173519755048411
          vf_explained_var: 0.9892157912254333
          vf_loss: 0.08891622464039496
    num_agent_steps_sampled: 1238760
    num_agent_steps_trained: 1238760
    num_steps_sampled: 1238760
    num_steps_trained: 1238760
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,620,16912.2,1238760,8.797,14.72,-0.05,97.95




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1240758
  custom_metrics: {}
  date: 2021-11-09_08-28-03
  done: false
  episode_len_mean: 96.87
  episode_media: {}
  episode_reward_max: 14.700000000000014
  episode_reward_mean: 8.827200000000015
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 12138
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.3354975882030669
          entropy_coeff: 0.009999999999999998
          kl: 0.010604790646963212
          policy_loss: -0.014837046952119895
          total_loss: 0.21610503113784252
          vf_explained_var: 0.9700362682342529
          vf_loss: 0.23283094751338165
    num_agent_steps_sampled: 1240758
    num_agent_steps_trained: 1240758
    num_steps_sampled: 1240758
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,621,16952,1240758,8.8272,14.7,-0.06,96.87




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1242756
  custom_metrics: {}
  date: 2021-11-09_08-28-43
  done: false
  episode_len_mean: 97.22
  episode_media: {}
  episode_reward_max: 14.700000000000012
  episode_reward_mean: 8.559200000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 12159
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.3078789955093748
          entropy_coeff: 0.009999999999999998
          kl: 0.007389390237840555
          policy_loss: -0.09472893905781564
          total_loss: 0.09937409355881668
          vf_explained_var: 0.9526451230049133
          vf_loss: 0.19919226558967715
    num_agent_steps_sampled: 1242756
    num_agent_steps_trained: 1242756
    num_steps_sampled: 1242756
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,622,16991.4,1242756,8.5592,14.7,-0.06,97.22


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1244754
  custom_metrics: {}
  date: 2021-11-09_08-29-08
  done: false
  episode_len_mean: 96.5
  episode_media: {}
  episode_reward_max: 14.700000000000012
  episode_reward_mean: 8.686100000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 12178
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.3298520508266631
          entropy_coeff: 0.009999999999999998
          kl: 0.0066219162958837275
          policy_loss: -0.0872648075222969
          total_loss: 0.01165081269684292
          vf_explained_var: 0.9840903878211975
          vf_loss: 0.10505439614256223
    num_agent_steps_sampled: 1244754
    num_agent_steps_trained: 1244754
    num_steps_sampled: 1244754
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,623,17016.3,1244754,8.6861,14.7,-0.06,96.5


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1246752
  custom_metrics: {}
  date: 2021-11-09_08-29-33
  done: false
  episode_len_mean: 97.93
  episode_media: {}
  episode_reward_max: 14.700000000000012
  episode_reward_mean: 8.804900000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 12199
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.296181153115772
          entropy_coeff: 0.009999999999999998
          kl: 0.008361694609429824
          policy_loss: -0.05216062530165627
          total_loss: 0.056886499610152984
          vf_explained_var: 0.978830099105835
          vf_loss: 0.11296810796927838
    num_agent_steps_sampled: 1246752
    num_agent_steps_trained: 1246752
    num_steps_sampled: 1246752
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,624,17041.6,1246752,8.8049,14.7,-0.06,97.93


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1248750
  custom_metrics: {}
  date: 2021-11-09_08-29-58
  done: false
  episode_len_mean: 97.67
  episode_media: {}
  episode_reward_max: 14.740000000000013
  episode_reward_mean: 8.659800000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 12219
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.3444948718661354
          entropy_coeff: 0.009999999999999998
          kl: 0.010329118358945902
          policy_loss: -0.0490782818978741
          total_loss: 0.0703558391580979
          vf_explained_var: 0.9760794043540955
          vf_loss: 0.12171102783509663
    num_agent_steps_sampled: 1248750
    num_agent_steps_trained: 1248750
    num_steps_sampled: 1248750
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,625,17067,1248750,8.6598,14.74,-0.06,97.67


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1250748
  custom_metrics: {}
  date: 2021-11-09_08-30-25
  done: false
  episode_len_mean: 98.46
  episode_media: {}
  episode_reward_max: 14.740000000000013
  episode_reward_mean: 8.712400000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 12239
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.2197730649085272
          entropy_coeff: 0.009999999999999998
          kl: 0.008251213137048817
          policy_loss: -0.11757340910179274
          total_loss: 0.020074249741931758
          vf_explained_var: 0.9818271994590759
          vf_loss: 0.14092401677653904
    num_agent_steps_sampled: 1250748
    num_agent_steps_trained: 1250748
    num_steps_sampled: 1250748
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,626,17093.8,1250748,8.7124,14.74,-0.06,98.46


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1252746
  custom_metrics: {}
  date: 2021-11-09_08-30-52
  done: false
  episode_len_mean: 98.1
  episode_media: {}
  episode_reward_max: 14.770000000000012
  episode_reward_mean: 9.048200000000016
  episode_reward_min: 4.180000000000023
  episodes_this_iter: 22
  episodes_total: 12261
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.2705732084455945
          entropy_coeff: 0.009999999999999998
          kl: 0.009477001941735284
          policy_loss: -0.05996169564979417
          total_loss: 0.03793432402114073
          vf_explained_var: 0.9827896952629089
          vf_loss: 0.10035503297334626
    num_agent_steps_sampled: 1252746
    num_agent_steps_trained: 1252746
    num_steps_sampled: 1252746
    num_steps_trained: 125

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,627,17120.3,1252746,9.0482,14.77,4.18,98.1


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1254744
  custom_metrics: {}
  date: 2021-11-09_08-31-19
  done: false
  episode_len_mean: 96.86
  episode_media: {}
  episode_reward_max: 14.770000000000012
  episode_reward_mean: 8.963000000000017
  episode_reward_min: 2.0400000000000147
  episodes_this_iter: 20
  episodes_total: 12281
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.2107374409834544
          entropy_coeff: 0.009999999999999998
          kl: 0.011854897315275198
          policy_loss: -0.043274852739913126
          total_loss: 0.0932677251952035
          vf_explained_var: 0.9761278629302979
          vf_loss: 0.13583220557442732
    num_agent_steps_sampled: 1254744
    num_agent_steps_trained: 1254744
    num_steps_sampled: 1254744
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,628,17147.6,1254744,8.963,14.77,2.04,96.86


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1256742
  custom_metrics: {}
  date: 2021-11-09_08-31-46
  done: false
  episode_len_mean: 94.22
  episode_media: {}
  episode_reward_max: 14.770000000000012
  episode_reward_mean: 9.519500000000019
  episode_reward_min: 2.0400000000000147
  episodes_this_iter: 24
  episodes_total: 12305
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.1830482471556891
          entropy_coeff: 0.009999999999999998
          kl: 0.009609657309736314
          policy_loss: -0.06510703024410067
          total_loss: 0.05334972892666147
          vf_explained_var: 0.9904260039329529
          vf_loss: 0.11989709147739978
    num_agent_steps_sampled: 1256742
    num_agent_steps_trained: 1256742
    num_steps_sampled: 1256742
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,629,17175,1256742,9.5195,14.77,2.04,94.22


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1258740
  custom_metrics: {}
  date: 2021-11-09_08-32-13
  done: false
  episode_len_mean: 92.81
  episode_media: {}
  episode_reward_max: 14.770000000000012
  episode_reward_mean: 9.921000000000015
  episode_reward_min: 2.0400000000000147
  episodes_this_iter: 21
  episodes_total: 12326
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.2225059287888662
          entropy_coeff: 0.009999999999999998
          kl: 0.0072212366364690465
          policy_loss: -0.006228557388697352
          total_loss: 0.0625874171122199
          vf_explained_var: 0.9909363985061646
          vf_loss: 0.07323329352019799
    num_agent_steps_sampled: 1258740
    num_agent_steps_trained: 1258740
    num_steps_sampled: 1258740
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,630,17201.5,1258740,9.921,14.77,2.04,92.81


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1260738
  custom_metrics: {}
  date: 2021-11-09_08-32-39
  done: false
  episode_len_mean: 93.08
  episode_media: {}
  episode_reward_max: 14.770000000000012
  episode_reward_mean: 9.698900000000016
  episode_reward_min: 2.0400000000000147
  episodes_this_iter: 20
  episodes_total: 12346
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.251102043049676
          entropy_coeff: 0.009999999999999998
          kl: 0.008335439516220596
          policy_loss: -0.006247666961557808
          total_loss: 0.07842084760999396
          vf_explained_var: 0.985125720500946
          vf_loss: 0.08816709423526412
    num_agent_steps_sampled: 1260738
    num_agent_steps_trained: 1260738
    num_steps_sampled: 1260738
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,631,17227.7,1260738,9.6989,14.77,2.04,93.08


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1262736
  custom_metrics: {}
  date: 2021-11-09_08-33-06
  done: false
  episode_len_mean: 94.29
  episode_media: {}
  episode_reward_max: 14.700000000000014
  episode_reward_mean: 9.503400000000017
  episode_reward_min: 2.0400000000000147
  episodes_this_iter: 22
  episodes_total: 12368
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.2203390104430063
          entropy_coeff: 0.009999999999999998
          kl: 0.008834782378908734
          policy_loss: -0.03671047580206678
          total_loss: 0.05017363276510012
          vf_explained_var: 0.9901627898216248
          vf_loss: 0.08953515872181882
    num_agent_steps_sampled: 1262736
    num_agent_steps_trained: 1262736
    num_steps_sampled: 1262736
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,632,17254.6,1262736,9.5034,14.7,2.04,94.29


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1264734
  custom_metrics: {}
  date: 2021-11-09_08-33-32
  done: false
  episode_len_mean: 95.58
  episode_media: {}
  episode_reward_max: 14.690000000000014
  episode_reward_mean: 9.564200000000017
  episode_reward_min: 2.6000000000000165
  episodes_this_iter: 19
  episodes_total: 12387
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.2959547559420268
          entropy_coeff: 0.009999999999999998
          kl: 0.007663109496478844
          policy_loss: -0.019962161300437792
          total_loss: 0.07507175933126183
          vf_explained_var: 0.987800657749176
          vf_loss: 0.09970796217343637
    num_agent_steps_sampled: 1264734
    num_agent_steps_trained: 1264734
    num_steps_sampled: 1264734
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,633,17280.7,1264734,9.5642,14.69,2.6,95.58


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1266732
  custom_metrics: {}
  date: 2021-11-09_08-33-58
  done: false
  episode_len_mean: 96.97
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 9.173700000000018
  episode_reward_min: 2.6000000000000165
  episodes_this_iter: 20
  episodes_total: 12407
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.286963263012114
          entropy_coeff: 0.009999999999999998
          kl: 0.011262219375509686
          policy_loss: -0.0187643520179249
          total_loss: 0.09475278306220258
          vf_explained_var: 0.9822729229927063
          vf_loss: 0.11420983824701536
    num_agent_steps_sampled: 1266732
    num_agent_steps_trained: 1266732
    num_steps_sampled: 1266732
    num_steps_trained: 126

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,634,17306.7,1266732,9.1737,14.67,2.6,96.97


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1268730
  custom_metrics: {}
  date: 2021-11-09_08-34-23
  done: false
  episode_len_mean: 98.96
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 9.163800000000016
  episode_reward_min: 2.6000000000000165
  episodes_this_iter: 19
  episodes_total: 12426
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.35548472234181
          entropy_coeff: 0.009999999999999998
          kl: 0.009379483093931707
          policy_loss: -0.024192883322636288
          total_loss: 0.12640542827014412
          vf_explained_var: 0.9783480763435364
          vf_loss: 0.15401187783905437
    num_agent_steps_sampled: 1268730
    num_agent_steps_trained: 1268730
    num_steps_sampled: 1268730
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,635,17331.5,1268730,9.1638,14.67,2.6,98.96




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1270728
  custom_metrics: {}
  date: 2021-11-09_08-35-06
  done: false
  episode_len_mean: 98.14
  episode_media: {}
  episode_reward_max: 14.720000000000011
  episode_reward_mean: 9.224700000000018
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 22
  episodes_total: 12448
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.2135692590758913
          entropy_coeff: 0.009999999999999998
          kl: 0.008652762782309273
          policy_loss: -0.014720471895166806
          total_loss: 0.20391237408898416
          vf_explained_var: 0.9739631414413452
          vf_loss: 0.22141300138263476
    num_agent_steps_sampled: 1270728
    num_agent_steps_trained: 1270728
    num_steps_sampled: 1270728
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,636,17374.3,1270728,9.2247,14.72,-0.15,98.14


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1272726
  custom_metrics: {}
  date: 2021-11-09_08-35-33
  done: false
  episode_len_mean: 98.24
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 9.06450000000002
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 21
  episodes_total: 12469
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.2926972758202326
          entropy_coeff: 0.009999999999999998
          kl: 0.009981622814344287
          policy_loss: -0.025238562197912307
          total_loss: 0.12895993107841128
          vf_explained_var: 0.9810957908630371
          vf_loss: 0.15633314177393914
    num_agent_steps_sampled: 1272726
    num_agent_steps_trained: 1272726
    num_steps_sampled: 1272726
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,637,17401.5,1272726,9.0645,14.72,-0.15,98.24




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1274724
  custom_metrics: {}
  date: 2021-11-09_08-36-15
  done: false
  episode_len_mean: 95.17
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 9.145800000000017
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 22
  episodes_total: 12491
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.1479822783243088
          entropy_coeff: 0.009999999999999998
          kl: 0.010570059855658153
          policy_loss: -0.044234084036378636
          total_loss: 0.19491075525681179
          vf_explained_var: 0.9726147055625916
          vf_loss: 0.23919611257456597
    num_agent_steps_sampled: 1274724
    num_agent_steps_trained: 1274724
    num_steps_sampled: 1274724
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,638,17443.4,1274724,9.1458,14.72,-0.15,95.17




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1276722
  custom_metrics: {}
  date: 2021-11-09_08-36-56
  done: false
  episode_len_mean: 95.06
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 8.753000000000016
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 21
  episodes_total: 12512
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.2694011120569137
          entropy_coeff: 0.009999999999999998
          kl: 0.01320512384681715
          policy_loss: -0.042826520491923606
          total_loss: 0.19273785439408606
          vf_explained_var: 0.9646519422531128
          vf_loss: 0.23398074557383855
    num_agent_steps_sampled: 1276722
    num_agent_steps_trained: 1276722
    num_steps_sampled: 1276722
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,639,17484.5,1276722,8.753,14.72,-0.15,95.06


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1278720
  custom_metrics: {}
  date: 2021-11-09_08-37-22
  done: false
  episode_len_mean: 93.93
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 8.804500000000019
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 21
  episodes_total: 12533
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.1098250846068065
          entropy_coeff: 0.009999999999999998
          kl: 0.0076403499347162855
          policy_loss: -0.11313867144996212
          total_loss: -0.01605150732433512
          vf_explained_var: 0.9839546084403992
          vf_loss: 0.09992451896624906
    num_agent_steps_sampled: 1278720
    num_agent_steps_trained: 1278720
    num_steps_sampled: 1278720
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,640,17510.5,1278720,8.8045,14.72,-0.15,93.93


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1280718
  custom_metrics: {}
  date: 2021-11-09_08-37-48
  done: false
  episode_len_mean: 94.56
  episode_media: {}
  episode_reward_max: 14.700000000000014
  episode_reward_mean: 8.904900000000016
  episode_reward_min: -0.05
  episodes_this_iter: 20
  episodes_total: 12553
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.2397700113909584
          entropy_coeff: 0.009999999999999998
          kl: 0.008831233734096942
          policy_loss: -0.046813892821470894
          total_loss: 0.06021699877012344
          vf_explained_var: 0.986810564994812
          vf_loss: 0.10988009002591882
    num_agent_steps_sampled: 1280718
    num_agent_steps_trained: 1280718
    num_steps_sampled: 1280718
    num_steps_trained: 1280718
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,641,17536,1280718,8.9049,14.7,-0.05,94.56


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1282716
  custom_metrics: {}
  date: 2021-11-09_08-38-15
  done: false
  episode_len_mean: 95.08
  episode_media: {}
  episode_reward_max: 14.700000000000014
  episode_reward_mean: 8.998800000000017
  episode_reward_min: -0.05
  episodes_this_iter: 21
  episodes_total: 12574
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.190911754256203
          entropy_coeff: 0.009999999999999998
          kl: 0.008025322007527902
          policy_loss: -0.011068396944375265
          total_loss: 0.051031778987851883
          vf_explained_var: 0.99098801612854
          vf_loss: 0.06533215910728489
    num_agent_steps_sampled: 1282716
    num_agent_steps_trained: 1282716
    num_steps_sampled: 1282716
    num_steps_trained: 1282716
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,642,17562.9,1282716,8.9988,14.7,-0.05,95.08


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1284714
  custom_metrics: {}
  date: 2021-11-09_08-38-41
  done: false
  episode_len_mean: 97.76
  episode_media: {}
  episode_reward_max: 14.690000000000014
  episode_reward_mean: 8.947400000000016
  episode_reward_min: 3.9100000000000255
  episodes_this_iter: 21
  episodes_total: 12595
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.1703751481714704
          entropy_coeff: 0.009999999999999998
          kl: 0.010164446269404633
          policy_loss: -0.006511962271872021
          total_loss: 0.10581306834660825
          vf_explained_var: 0.9859546422958374
          vf_loss: 0.11303878376881281
    num_agent_steps_sampled: 1284714
    num_agent_steps_trained: 1284714
    num_steps_sampled: 1284714
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,643,17589.2,1284714,8.9474,14.69,3.91,97.76


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1286712
  custom_metrics: {}
  date: 2021-11-09_08-39-08
  done: false
  episode_len_mean: 96.13
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 9.362400000000017
  episode_reward_min: 3.9100000000000255
  episodes_this_iter: 21
  episodes_total: 12616
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.21723207008271
          entropy_coeff: 0.009999999999999998
          kl: 0.006452303521016946
          policy_loss: -0.09270382483622858
          total_loss: -0.053758642210492064
          vf_explained_var: 0.9956234097480774
          vf_loss: 0.044141146837778034
    num_agent_steps_sampled: 1286712
    num_agent_steps_trained: 1286712
    num_steps_sampled: 1286712
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,644,17615.7,1286712,9.3624,14.71,3.91,96.13


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1288710
  custom_metrics: {}
  date: 2021-11-09_08-39-34
  done: false
  episode_len_mean: 96.57
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 9.495600000000017
  episode_reward_min: 4.330000000000021
  episodes_this_iter: 19
  episodes_total: 12635
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.261260790768124
          entropy_coeff: 0.009999999999999998
          kl: 0.007981061084907886
          policy_loss: -0.03602490520903042
          total_loss: 0.035203607309432255
          vf_explained_var: 0.9862095713615417
          vf_loss: 0.0752118424202005
    num_agent_steps_sampled: 1288710
    num_agent_steps_trained: 1288710
    num_steps_sampled: 1288710
    num_steps_trained: 128

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,645,17642.6,1288710,9.4956,14.71,4.33,96.57


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1290708
  custom_metrics: {}
  date: 2021-11-09_08-40-01
  done: false
  episode_len_mean: 96.41
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 9.376100000000015
  episode_reward_min: 4.330000000000021
  episodes_this_iter: 22
  episodes_total: 12657
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.2403569919722421
          entropy_coeff: 0.009999999999999998
          kl: 0.012365096689341604
          policy_loss: -0.002159638206164042
          total_loss: 0.062094802444889435
          vf_explained_var: 0.990706741809845
          vf_loss: 0.06328862622557652
    num_agent_steps_sampled: 1290708
    num_agent_steps_trained: 1290708
    num_steps_sampled: 1290708
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,646,17669.1,1290708,9.3761,14.71,4.33,96.41


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1292706
  custom_metrics: {}
  date: 2021-11-09_08-40-28
  done: false
  episode_len_mean: 96.1
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 9.432400000000017
  episode_reward_min: 4.330000000000021
  episodes_this_iter: 21
  episodes_total: 12678
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.1249242158163162
          entropy_coeff: 0.009999999999999998
          kl: 0.010793706915283608
          policy_loss: -0.023923201264724844
          total_loss: 0.05356610369469438
          vf_explained_var: 0.9836084246635437
          vf_loss: 0.07706817920275387
    num_agent_steps_sampled: 1292706
    num_agent_steps_trained: 1292706
    num_steps_sampled: 1292706
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,647,17695.8,1292706,9.4324,14.71,4.33,96.1


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1294704
  custom_metrics: {}
  date: 2021-11-09_08-40-53
  done: false
  episode_len_mean: 96.91
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 9.427700000000018
  episode_reward_min: 3.910000000000025
  episodes_this_iter: 20
  episodes_total: 12698
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.2907880028088887
          entropy_coeff: 0.009999999999999998
          kl: 0.007723855578735686
          policy_loss: -0.022666135980259804
          total_loss: 0.04298130479153423
          vf_explained_var: 0.9864258766174316
          vf_loss: 0.07020413687541371
    num_agent_steps_sampled: 1294704
    num_agent_steps_trained: 1294704
    num_steps_sampled: 1294704
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,648,17721,1294704,9.4277,14.71,3.91,96.91


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1296702
  custom_metrics: {}
  date: 2021-11-09_08-41-20
  done: false
  episode_len_mean: 97.39
  episode_media: {}
  episode_reward_max: 14.690000000000014
  episode_reward_mean: 9.327300000000019
  episode_reward_min: 3.6700000000000266
  episodes_this_iter: 20
  episodes_total: 12718
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.2250310179733095
          entropy_coeff: 0.009999999999999998
          kl: 0.008363750548962283
          policy_loss: -0.010835238155864534
          total_loss: 0.07507364881180581
          vf_explained_var: 0.9895300269126892
          vf_loss: 0.08911614694765636
    num_agent_steps_sampled: 1296702
    num_agent_steps_trained: 1296702
    num_steps_sampled: 1296702
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,649,17747.7,1296702,9.3273,14.69,3.67,97.39


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1298700
  custom_metrics: {}
  date: 2021-11-09_08-41-46
  done: false
  episode_len_mean: 96.61
  episode_media: {}
  episode_reward_max: 14.810000000000015
  episode_reward_mean: 9.363600000000018
  episode_reward_min: 3.6700000000000266
  episodes_this_iter: 21
  episodes_total: 12739
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.1528286894162496
          entropy_coeff: 0.009999999999999998
          kl: 0.006842658997132297
          policy_loss: -0.09493403202366262
          total_loss: -0.03751039652242547
          vf_explained_var: 0.9935289025306702
          vf_loss: 0.061553505364628065
    num_agent_steps_sampled: 1298700
    num_agent_steps_trained: 1298700
    num_steps_sampled: 1298700
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,650,17774.1,1298700,9.3636,14.81,3.67,96.61


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1300698
  custom_metrics: {}
  date: 2021-11-09_08-42-14
  done: false
  episode_len_mean: 95.52
  episode_media: {}
  episode_reward_max: 14.810000000000015
  episode_reward_mean: 9.779300000000017
  episode_reward_min: 3.6700000000000266
  episodes_this_iter: 22
  episodes_total: 12761
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.1736947999114082
          entropy_coeff: 0.009999999999999998
          kl: 0.008339263478537194
          policy_loss: -0.03534576312771865
          total_loss: 0.049221687763929364
          vf_explained_var: 0.9916967749595642
          vf_loss: 0.08728782530164435
    num_agent_steps_sampled: 1300698
    num_agent_steps_trained: 1300698
    num_steps_sampled: 1300698
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,651,17801.6,1300698,9.7793,14.81,3.67,95.52


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1302696
  custom_metrics: {}
  date: 2021-11-09_08-42-40
  done: false
  episode_len_mean: 96.8
  episode_media: {}
  episode_reward_max: 14.810000000000015
  episode_reward_mean: 9.54590000000002
  episode_reward_min: 3.6700000000000266
  episodes_this_iter: 21
  episodes_total: 12782
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.2212655640783765
          entropy_coeff: 0.009999999999999998
          kl: 0.009448861632094125
          policy_loss: -0.00847012106151808
          total_loss: 0.07940345324043717
          vf_explained_var: 0.9870811104774475
          vf_loss: 0.08986993974872998
    num_agent_steps_sampled: 1302696
    num_agent_steps_trained: 1302696
    num_steps_sampled: 1302696
    num_steps_trained: 130

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,652,17827.4,1302696,9.5459,14.81,3.67,96.8




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1304694
  custom_metrics: {}
  date: 2021-11-09_08-43-20
  done: false
  episode_len_mean: 96.19
  episode_media: {}
  episode_reward_max: 14.810000000000015
  episode_reward_mean: 9.450200000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 12801
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.3232117352031527
          entropy_coeff: 0.009999999999999998
          kl: 0.005881713325121277
          policy_loss: -0.08926365006537665
          total_loss: -0.010836234059007395
          vf_explained_var: 0.9785871505737305
          vf_loss: 0.0853001119125457
    num_agent_steps_sampled: 1304694
    num_agent_steps_trained: 1304694
    num_steps_sampled: 1304694
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,653,17867.7,1304694,9.4502,14.81,-0.06,96.19


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1306692
  custom_metrics: {}
  date: 2021-11-09_08-43-47
  done: false
  episode_len_mean: 96.08
  episode_media: {}
  episode_reward_max: 14.91000000000001
  episode_reward_mean: 9.538500000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 12822
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.2720087681497847
          entropy_coeff: 0.009999999999999998
          kl: 0.00741449143897124
          policy_loss: -0.07380073906055519
          total_loss: -0.040260686796335945
          vf_explained_var: 0.9900240898132324
          vf_loss: 0.03824344729738576
    num_agent_steps_sampled: 1306692
    num_agent_steps_trained: 1306692
    num_steps_sampled: 1306692
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,654,17894.7,1306692,9.5385,14.91,-0.06,96.08




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1308690
  custom_metrics: {}
  date: 2021-11-09_08-44-26
  done: false
  episode_len_mean: 95.47
  episode_media: {}
  episode_reward_max: 14.91000000000001
  episode_reward_mean: 9.097500000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 12843
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.266231746332986
          entropy_coeff: 0.009999999999999998
          kl: 0.008014454130963324
          policy_loss: -0.024224236394677842
          total_loss: 0.08861018015692632
          vf_explained_var: 0.9852057099342346
          vf_loss: 0.11683135044184469
    num_agent_steps_sampled: 1308690
    num_agent_steps_trained: 1308690
    num_steps_sampled: 1308690
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,655,17934.1,1308690,9.0975,14.91,-0.06,95.47




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1310688
  custom_metrics: {}
  date: 2021-11-09_08-45-07
  done: false
  episode_len_mean: 96.64
  episode_media: {}
  episode_reward_max: 14.91000000000001
  episode_reward_mean: 8.745500000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 12864
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.2231091777483623
          entropy_coeff: 0.009999999999999998
          kl: 0.009674737107475918
          policy_loss: -0.030291850297223952
          total_loss: 0.01824868034039225
          vf_explained_var: 0.9923801422119141
          vf_loss: 0.050311109210763656
    num_agent_steps_sampled: 1310688
    num_agent_steps_trained: 1310688
    num_steps_sampled: 1310688
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,656,17974.6,1310688,8.7455,14.91,-0.06,96.64


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1312686
  custom_metrics: {}
  date: 2021-11-09_08-45-33
  done: false
  episode_len_mean: 97.88
  episode_media: {}
  episode_reward_max: 14.91000000000001
  episode_reward_mean: 8.531600000000019
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 12883
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.2603014963013786
          entropy_coeff: 0.009999999999999998
          kl: 0.0100103868016515
          policy_loss: -0.04835105804460389
          total_loss: 0.009021919894786108
          vf_explained_var: 0.9913392663002014
          vf_loss: 0.059152568362298465
    num_agent_steps_sampled: 1312686
    num_agent_steps_trained: 1312686
    num_steps_sampled: 1312686
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,657,18000.9,1312686,8.5316,14.91,-0.06,97.88


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1314684
  custom_metrics: {}
  date: 2021-11-09_08-45-59
  done: false
  episode_len_mean: 97.14
  episode_media: {}
  episode_reward_max: 14.91000000000001
  episode_reward_mean: 8.99320000000002
  episode_reward_min: -0.04
  episodes_this_iter: 21
  episodes_total: 12904
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.1985279812699272
          entropy_coeff: 0.009999999999999998
          kl: 0.006029998791266465
          policy_loss: -0.0782765228833471
          total_loss: -0.054671440104998296
          vf_explained_var: 0.9958929419517517
          vf_loss: 0.029070610400023206
    num_agent_steps_sampled: 1314684
    num_agent_steps_trained: 1314684
    num_steps_sampled: 1314684
    num_steps_trained: 1314684
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,658,18026.8,1314684,8.9932,14.91,-0.04,97.14


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1316682
  custom_metrics: {}
  date: 2021-11-09_08-46-26
  done: false
  episode_len_mean: 97.72
  episode_media: {}
  episode_reward_max: 14.730000000000011
  episode_reward_mean: 8.782100000000018
  episode_reward_min: -0.04
  episodes_this_iter: 20
  episodes_total: 12924
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0812194824218753
          cur_lr: 5.000000000000001e-05
          entropy: 1.283687971319471
          entropy_coeff: 0.009999999999999998
          kl: 0.02213853888992628
          policy_loss: 0.02219343957092081
          total_loss: 0.19020061425509907
          vf_explained_var: 0.9804750084877014
          vf_loss: 0.15690743662416934
    num_agent_steps_sampled: 1316682
    num_agent_steps_trained: 1316682
    num_steps_sampled: 1316682
    num_steps_trained: 1316682
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,659,18054,1316682,8.7821,14.73,-0.04,97.72


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1318680
  custom_metrics: {}
  date: 2021-11-09_08-46-54
  done: false
  episode_len_mean: 98.81
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 9.105600000000019
  episode_reward_min: 3.3400000000000247
  episodes_this_iter: 20
  episodes_total: 12944
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.217301338627225
          entropy_coeff: 0.009999999999999998
          kl: 0.006622773297336591
          policy_loss: -0.034176618641331084
          total_loss: 0.05959947474655651
          vf_explained_var: 0.9819309711456299
          vf_loss: 0.09520809856199083
    num_agent_steps_sampled: 1318680
    num_agent_steps_trained: 1318680
    num_steps_sampled: 1318680
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,660,18081.4,1318680,9.1056,14.72,3.34,98.81


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1320678
  custom_metrics: {}
  date: 2021-11-09_08-47-20
  done: false
  episode_len_mean: 99.39
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 9.048500000000018
  episode_reward_min: 2.500000000000025
  episodes_this_iter: 20
  episodes_total: 12964
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.2488238760403225
          entropy_coeff: 0.009999999999999998
          kl: 0.007382721854620309
          policy_loss: -0.02345167566977796
          total_loss: 0.05038989078076113
          vf_explained_var: 0.9833713173866272
          vf_loss: 0.07435629074240015
    num_agent_steps_sampled: 1320678
    num_agent_steps_trained: 1320678
    num_steps_sampled: 1320678
    num_steps_trained: 132

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,661,18107.1,1320678,9.0485,14.72,2.5,99.39


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1322676
  custom_metrics: {}
  date: 2021-11-09_08-47-46
  done: false
  episode_len_mean: 99.42
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 9.12080000000002
  episode_reward_min: 2.500000000000025
  episodes_this_iter: 20
  episodes_total: 12984
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.3000020390465146
          entropy_coeff: 0.009999999999999998
          kl: 0.00687733346060433
          policy_loss: -0.021917596388430823
          total_loss: 0.04478064484539486
          vf_explained_var: 0.9868665337562561
          vf_loss: 0.06854440006649211
    num_agent_steps_sampled: 1322676
    num_agent_steps_trained: 1322676
    num_steps_sampled: 1322676
    num_steps_trained: 1322

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,662,18133.3,1322676,9.1208,14.72,2.5,99.42


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1324674
  custom_metrics: {}
  date: 2021-11-09_08-48-12
  done: false
  episode_len_mean: 100.76
  episode_media: {}
  episode_reward_max: 14.730000000000013
  episode_reward_mean: 9.039500000000016
  episode_reward_min: 2.500000000000025
  episodes_this_iter: 19
  episodes_total: 13003
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.309872573330289
          entropy_coeff: 0.009999999999999998
          kl: 0.007036717105510712
          policy_loss: -0.03862335140417729
          total_loss: 0.03677923939235154
          vf_explained_var: 0.9878404140472412
          vf_loss: 0.07708896449101822
    num_agent_steps_sampled: 1324674
    num_agent_steps_trained: 1324674
    num_steps_sampled: 1324674
    num_steps_trained: 132

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,663,18159.4,1324674,9.0395,14.73,2.5,100.76


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1326672
  custom_metrics: {}
  date: 2021-11-09_08-48-37
  done: false
  episode_len_mean: 101.14
  episode_media: {}
  episode_reward_max: 14.730000000000013
  episode_reward_mean: 9.019100000000018
  episode_reward_min: 2.500000000000025
  episodes_this_iter: 20
  episodes_total: 13023
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.3106338330677578
          entropy_coeff: 0.009999999999999998
          kl: 0.00785424343105336
          policy_loss: -0.013927834232648213
          total_loss: 0.09010957616070907
          vf_explained_var: 0.9777507781982422
          vf_loss: 0.10440550914832523
    num_agent_steps_sampled: 1326672
    num_agent_steps_trained: 1326672
    num_steps_sampled: 1326672
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,664,18184.8,1326672,9.0191,14.73,2.5,101.14


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1328670
  custom_metrics: {}
  date: 2021-11-09_08-49-03
  done: false
  episode_len_mean: 100.99
  episode_media: {}
  episode_reward_max: 14.730000000000013
  episode_reward_mean: 9.110300000000018
  episode_reward_min: 2.500000000000025
  episodes_this_iter: 21
  episodes_total: 13044
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.2326831601914905
          entropy_coeff: 0.009999999999999998
          kl: 0.005925884406240395
          policy_loss: -0.0052621138947350635
          total_loss: 0.052121062461464175
          vf_explained_var: 0.9938401579856873
          vf_loss: 0.060099235416523046
    num_agent_steps_sampled: 1328670
    num_agent_steps_trained: 1328670
    num_steps_sampled: 1328670
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,665,18210.7,1328670,9.1103,14.73,2.5,100.99


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1330668
  custom_metrics: {}
  date: 2021-11-09_08-49-27
  done: false
  episode_len_mean: 101.35
  episode_media: {}
  episode_reward_max: 14.730000000000013
  episode_reward_mean: 8.97250000000002
  episode_reward_min: 3.150000000000014
  episodes_this_iter: 18
  episodes_total: 13062
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.3993623994645619
          entropy_coeff: 0.009999999999999998
          kl: 0.0073951318938820215
          policy_loss: -0.05562752818777448
          total_loss: 0.015461691021032276
          vf_explained_var: 0.9811099171638489
          vf_loss: 0.07308919964624302
    num_agent_steps_sampled: 1330668
    num_agent_steps_trained: 1330668
    num_steps_sampled: 1330668
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,666,18234.1,1330668,8.9725,14.73,3.15,101.35


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1332666
  custom_metrics: {}
  date: 2021-11-09_08-49-52
  done: false
  episode_len_mean: 101.49
  episode_media: {}
  episode_reward_max: 14.730000000000013
  episode_reward_mean: 8.793000000000019
  episode_reward_min: 3.150000000000014
  episodes_this_iter: 20
  episodes_total: 13082
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.3236934281530834
          entropy_coeff: 0.009999999999999998
          kl: 0.006821052917090314
          policy_loss: -0.016592174999061085
          total_loss: 0.05053114593029022
          vf_explained_var: 0.988635241985321
          vf_loss: 0.06929767470629443
    num_agent_steps_sampled: 1332666
    num_agent_steps_trained: 1332666
    num_steps_sampled: 1332666
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,667,18259.2,1332666,8.793,14.73,3.15,101.49


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1334664
  custom_metrics: {}
  date: 2021-11-09_08-50-17
  done: false
  episode_len_mean: 102.04
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.646800000000018
  episode_reward_min: 3.150000000000014
  episodes_this_iter: 20
  episodes_total: 13102
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.3411377725147067
          entropy_coeff: 0.009999999999999998
          kl: 0.011304285371534713
          policy_loss: -0.04991903024769965
          total_loss: 0.07972798455683958
          vf_explained_var: 0.9810722470283508
          vf_loss: 0.12472477262573582
    num_agent_steps_sampled: 1334664
    num_agent_steps_trained: 1334664
    num_steps_sampled: 1334664
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,668,18284.3,1334664,8.6468,14.68,3.15,102.04


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1336662
  custom_metrics: {}
  date: 2021-11-09_08-50-41
  done: false
  episode_len_mean: 102.79
  episode_media: {}
  episode_reward_max: 14.820000000000011
  episode_reward_mean: 8.570500000000019
  episode_reward_min: 3.150000000000014
  episodes_this_iter: 18
  episodes_total: 13120
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.2255451611110142
          entropy_coeff: 0.009999999999999998
          kl: 0.006526733071534407
          policy_loss: -0.08858051839328948
          total_loss: -0.02819291190022514
          vf_explained_var: 0.9873955845832825
          vf_loss: 0.06205781056944813
    num_agent_steps_sampled: 1336662
    num_agent_steps_trained: 1336662
    num_steps_sampled: 1336662
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,669,18308.2,1336662,8.5705,14.82,3.15,102.79


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1338660
  custom_metrics: {}
  date: 2021-11-09_08-51-06
  done: false
  episode_len_mean: 102.83
  episode_media: {}
  episode_reward_max: 14.820000000000011
  episode_reward_mean: 8.415800000000019
  episode_reward_min: 3.150000000000014
  episodes_this_iter: 20
  episodes_total: 13140
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.3322919340360733
          entropy_coeff: 0.009999999999999998
          kl: 0.007925814567907051
          policy_loss: -0.01688239292374679
          total_loss: 0.11659631439085517
          vf_explained_var: 0.9841570258140564
          vf_loss: 0.13394730704880897
    num_agent_steps_sampled: 1338660
    num_agent_steps_trained: 1338660
    num_steps_sampled: 1338660
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,670,18333.4,1338660,8.4158,14.82,3.15,102.83


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1340658
  custom_metrics: {}
  date: 2021-11-09_08-51-33
  done: false
  episode_len_mean: 101.02
  episode_media: {}
  episode_reward_max: 14.820000000000011
  episode_reward_mean: 8.661200000000019
  episode_reward_min: 3.7300000000000244
  episodes_this_iter: 21
  episodes_total: 13161
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.181570570525669
          entropy_coeff: 0.009999999999999998
          kl: 0.006885744160275978
          policy_loss: -0.050683824629301114
          total_loss: 0.042643861135556585
          vf_explained_var: 0.9860342741012573
          vf_loss: 0.0939758895630283
    num_agent_steps_sampled: 1340658
    num_agent_steps_trained: 1340658
    num_steps_sampled: 1340658
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,671,18360.1,1340658,8.6612,14.82,3.73,101.02




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1342656
  custom_metrics: {}
  date: 2021-11-09_08-52-15
  done: false
  episode_len_mean: 99.51
  episode_media: {}
  episode_reward_max: 14.820000000000011
  episode_reward_mean: 9.065400000000016
  episode_reward_min: 3.7300000000000244
  episodes_this_iter: 22
  episodes_total: 13183
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.2292774699983142
          entropy_coeff: 0.009999999999999998
          kl: 0.006423010271734597
          policy_loss: 0.0008458766908872696
          total_loss: 0.07683403760283476
          vf_explained_var: 0.9886442422866821
          vf_loss: 0.07786391145505366
    num_agent_steps_sampled: 1342656
    num_agent_steps_trained: 1342656
    num_steps_sampled: 1342656
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,672,18401.8,1342656,9.0654,14.82,3.73,99.51




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1344654
  custom_metrics: {}
  date: 2021-11-09_08-52-55
  done: false
  episode_len_mean: 96.97
  episode_media: {}
  episode_reward_max: 14.820000000000011
  episode_reward_mean: 9.410900000000018
  episode_reward_min: -0.03
  episodes_this_iter: 22
  episodes_total: 13205
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.1719917490368799
          entropy_coeff: 0.009999999999999998
          kl: 0.005535530451353335
          policy_loss: -0.03552097033132755
          total_loss: 0.0471725614386655
          vf_explained_var: 0.9876457452774048
          vf_loss: 0.08543576400372244
    num_agent_steps_sampled: 1344654
    num_agent_steps_trained: 1344654
    num_steps_sampled: 1344654
    num_steps_trained: 1344654
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,673,18442,1344654,9.4109,14.82,-0.03,96.97




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1346652
  custom_metrics: {}
  date: 2021-11-09_08-53-38
  done: false
  episode_len_mean: 92.56
  episode_media: {}
  episode_reward_max: 14.780000000000012
  episode_reward_mean: 9.699200000000015
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 23
  episodes_total: 13228
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.0941498617331187
          entropy_coeff: 0.009999999999999998
          kl: 0.006146314835217052
          policy_loss: -0.043700874880665826
          total_loss: 0.08256608984832253
          vf_explained_var: 0.98946613073349
          vf_loss: 0.1272401882424241
    num_agent_steps_sampled: 1346652
    num_agent_steps_trained: 1346652
    num_steps_sampled: 1346652
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,674,18484.9,1346652,9.6992,14.78,-0.06,92.56


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1348650
  custom_metrics: {}
  date: 2021-11-09_08-54-05
  done: false
  episode_len_mean: 91.69
  episode_media: {}
  episode_reward_max: 14.780000000000012
  episode_reward_mean: 9.886700000000015
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 13249
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.204881107239496
          entropy_coeff: 0.009999999999999998
          kl: 0.0051269311498880685
          policy_loss: -0.004397079117950939
          total_loss: 0.053660883541618076
          vf_explained_var: 0.9896718263626099
          vf_loss: 0.06179176568541499
    num_agent_steps_sampled: 1348650
    num_agent_steps_trained: 1348650
    num_steps_sampled: 1348650
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,675,18512.1,1348650,9.8867,14.78,-0.06,91.69


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1350648
  custom_metrics: {}
  date: 2021-11-09_08-54-32
  done: false
  episode_len_mean: 91.56
  episode_media: {}
  episode_reward_max: 14.780000000000012
  episode_reward_mean: 10.130400000000016
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 13269
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.172662498553594
          entropy_coeff: 0.009999999999999998
          kl: 0.006093419790985352
          policy_loss: -0.06490945321995588
          total_loss: 0.008977502061142808
          vf_explained_var: 0.9918490648269653
          vf_loss: 0.07573109403075207
    num_agent_steps_sampled: 1350648
    num_agent_steps_trained: 1350648
    num_steps_sampled: 1350648
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,676,18539.1,1350648,10.1304,14.78,-0.06,91.56


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1352646
  custom_metrics: {}
  date: 2021-11-09_08-54-59
  done: false
  episode_len_mean: 92.15
  episode_media: {}
  episode_reward_max: 14.740000000000013
  episode_reward_mean: 10.044500000000015
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 23
  episodes_total: 13292
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.2305462769099644
          entropy_coeff: 0.009999999999999998
          kl: 0.008798772155506064
          policy_loss: -0.02353605663492566
          total_loss: 0.07284996682955396
          vf_explained_var: 0.9849663376808167
          vf_loss: 0.09442138138803698
    num_agent_steps_sampled: 1352646
    num_agent_steps_trained: 1352646
    num_steps_sampled: 1352646
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,677,18566.2,1352646,10.0445,14.74,-0.06,92.15


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1354644
  custom_metrics: {}
  date: 2021-11-09_08-55-25
  done: false
  episode_len_mean: 93.87
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 9.603800000000017
  episode_reward_min: 4.6600000000000215
  episodes_this_iter: 21
  episodes_total: 13313
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.263250206198011
          entropy_coeff: 0.009999999999999998
          kl: 0.005873614222542295
          policy_loss: -0.04307774069408576
          total_loss: 0.01539721806489286
          vf_explained_var: 0.9898597598075867
          vf_loss: 0.061581462022981474
    num_agent_steps_sampled: 1354644
    num_agent_steps_trained: 1354644
    num_steps_sampled: 1354644
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,678,18591.9,1354644,9.6038,14.72,4.66,93.87


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1356642
  custom_metrics: {}
  date: 2021-11-09_08-55-51
  done: false
  episode_len_mean: 95.75
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 9.293300000000016
  episode_reward_min: 4.340000000000023
  episodes_this_iter: 20
  episodes_total: 13333
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.2813897961661929
          entropy_coeff: 0.009999999999999998
          kl: 0.0059628203831454335
          policy_loss: -0.08009058402052947
          total_loss: 0.003362820865142913
          vf_explained_var: 0.9872331619262695
          vf_loss: 0.08659662575948807
    num_agent_steps_sampled: 1356642
    num_agent_steps_trained: 1356642
    num_steps_sampled: 1356642
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,679,18617.6,1356642,9.2933,14.72,4.34,95.75


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1358640
  custom_metrics: {}
  date: 2021-11-09_08-56-15
  done: false
  episode_len_mean: 96.66
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 9.140600000000017
  episode_reward_min: 4.340000000000023
  episodes_this_iter: 20
  episodes_total: 13353
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.3121890033994401
          entropy_coeff: 0.009999999999999998
          kl: 0.007941641992387708
          policy_loss: 0.002167681444968496
          total_loss: 0.10344541769119955
          vf_explained_var: 0.9846817851066589
          vf_loss: 0.10151963829994201
    num_agent_steps_sampled: 1358640
    num_agent_steps_trained: 1358640
    num_steps_sampled: 1358640
    num_steps_trained: 135

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,680,18642.1,1358640,9.1406,14.72,4.34,96.66


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1360638
  custom_metrics: {}
  date: 2021-11-09_08-56-41
  done: false
  episode_len_mean: 97.58
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 9.001800000000017
  episode_reward_min: 4.010000000000021
  episodes_this_iter: 19
  episodes_total: 13372
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.248548750650315
          entropy_coeff: 0.009999999999999998
          kl: 0.005957962733000045
          policy_loss: -0.04336427308929463
          total_loss: 0.006140669641484108
          vf_explained_var: 0.9876665472984314
          vf_loss: 0.052327630087910665
    num_agent_steps_sampled: 1360638
    num_agent_steps_trained: 1360638
    num_steps_sampled: 1360638
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,681,18667.6,1360638,9.0018,14.72,4.01,97.58


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1362636
  custom_metrics: {}
  date: 2021-11-09_08-57-07
  done: false
  episode_len_mean: 99.19
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 8.969300000000018
  episode_reward_min: 4.010000000000021
  episodes_this_iter: 21
  episodes_total: 13393
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.2566591481367746
          entropy_coeff: 0.009999999999999998
          kl: 0.0070156565926856075
          policy_loss: -0.05277776416568529
          total_loss: 0.06858783476054668
          vf_explained_var: 0.9753118753433228
          vf_loss: 0.12255399138444946
    num_agent_steps_sampled: 1362636
    num_agent_steps_trained: 1362636
    num_steps_sampled: 1362636
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,682,18693.7,1362636,8.9693,14.72,4.01,99.19


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1364634
  custom_metrics: {}
  date: 2021-11-09_08-57-31
  done: false
  episode_len_mean: 100.78
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.761900000000018
  episode_reward_min: 4.010000000000021
  episodes_this_iter: 19
  episodes_total: 13412
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.315309273061298
          entropy_coeff: 0.009999999999999998
          kl: 0.015449277361608378
          policy_loss: -0.053531751480130924
          total_loss: 0.058717756257170725
          vf_explained_var: 0.9866886138916016
          vf_loss: 0.10034650812546413
    num_agent_steps_sampled: 1364634
    num_agent_steps_trained: 1364634
    num_steps_sampled: 1364634
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,683,18718.3,1364634,8.7619,14.71,4.01,100.78


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1366632
  custom_metrics: {}
  date: 2021-11-09_08-57-58
  done: false
  episode_len_mean: 100.68
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.701100000000018
  episode_reward_min: 3.0500000000000127
  episodes_this_iter: 21
  episodes_total: 13433
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.2843522003718786
          entropy_coeff: 0.009999999999999998
          kl: 0.00862833461451106
          policy_loss: -0.0043237074500038514
          total_loss: 0.11661525954093252
          vf_explained_var: 0.978865385055542
          vf_loss: 0.11978880516475156
    num_agent_steps_sampled: 1366632
    num_agent_steps_trained: 1366632
    num_steps_sampled: 1366632
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,684,18744.8,1366632,8.7011,14.71,3.05,100.68


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1368630
  custom_metrics: {}
  date: 2021-11-09_08-58-25
  done: false
  episode_len_mean: 99.66
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 8.816000000000018
  episode_reward_min: 3.0500000000000127
  episodes_this_iter: 19
  episodes_total: 13452
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.2738367353166853
          entropy_coeff: 0.009999999999999998
          kl: 0.006157152074554764
          policy_loss: -0.0757567801202337
          total_loss: 0.025904023602959656
          vf_explained_var: 0.9866077303886414
          vf_loss: 0.10441332156991674
    num_agent_steps_sampled: 1368630
    num_agent_steps_trained: 1368630
    num_steps_sampled: 1368630
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,685,18771.5,1368630,8.816,14.72,3.05,99.66


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1370628
  custom_metrics: {}
  date: 2021-11-09_08-58-49
  done: false
  episode_len_mean: 100.71
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 8.531900000000018
  episode_reward_min: 3.0500000000000127
  episodes_this_iter: 20
  episodes_total: 13472
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.3428153038024901
          entropy_coeff: 0.009999999999999998
          kl: 0.0064412299718208
          policy_loss: -0.0394525288293759
          total_loss: 0.026905758403951215
          vf_explained_var: 0.9848993420600891
          vf_loss: 0.06933986671446335
    num_agent_steps_sampled: 1370628
    num_agent_steps_trained: 1370628
    num_steps_sampled: 1370628
    num_steps_trained: 137

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,686,18796,1370628,8.5319,14.72,3.05,100.71


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1372626
  custom_metrics: {}
  date: 2021-11-09_08-59-16
  done: false
  episode_len_mean: 99.71
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 8.751800000000017
  episode_reward_min: 3.0500000000000127
  episodes_this_iter: 20
  episodes_total: 13492
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.2415805498758952
          entropy_coeff: 0.009999999999999998
          kl: 0.005914008515457899
          policy_loss: -0.01951642396549384
          total_loss: 0.04025724563925039
          vf_explained_var: 0.9915178418159485
          vf_loss: 0.06259796290347973
    num_agent_steps_sampled: 1372626
    num_agent_steps_trained: 1372626
    num_steps_sampled: 1372626
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,687,18822.9,1372626,8.7518,14.72,3.05,99.71


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1374624
  custom_metrics: {}
  date: 2021-11-09_08-59-42
  done: false
  episode_len_mean: 99.04
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 8.877600000000015
  episode_reward_min: 2.9100000000000175
  episodes_this_iter: 20
  episodes_total: 13512
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.3226680823734829
          entropy_coeff: 0.009999999999999998
          kl: 0.0067968771569144205
          policy_loss: -0.03107089503180413
          total_loss: 0.1013406416844754
          vf_explained_var: 0.9734025001525879
          vf_loss: 0.1346148450459753
    num_agent_steps_sampled: 1374624
    num_agent_steps_trained: 1374624
    num_steps_sampled: 1374624
    num_steps_trained: 137

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,688,18848.4,1374624,8.8776,14.72,2.91,99.04




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1376622
  custom_metrics: {}
  date: 2021-11-09_09-00-21
  done: false
  episode_len_mean: 98.5
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.703000000000017
  episode_reward_min: -0.03
  episodes_this_iter: 22
  episodes_total: 13534
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.2433201159749712
          entropy_coeff: 0.009999999999999998
          kl: 0.005592113518239455
          policy_loss: -0.06001191708658423
          total_loss: 0.06379068286291191
          vf_explained_var: 0.9818100929260254
          vf_loss: 0.12716634712581124
    num_agent_steps_sampled: 1376622
    num_agent_steps_trained: 1376622
    num_steps_sampled: 1376622
    num_steps_trained: 1376622
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,689,18887.2,1376622,8.703,14.71,-0.03,98.5




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1378620
  custom_metrics: {}
  date: 2021-11-09_09-01-17
  done: false
  episode_len_mean: 96.82
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.607100000000017
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 22
  episodes_total: 13556
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.621829223632812
          cur_lr: 5.000000000000001e-05
          entropy: 1.3395450932638986
          entropy_coeff: 0.009999999999999998
          kl: 0.004800749025442335
          policy_loss: -0.005823692360094616
          total_loss: 0.19197020178572052
          vf_explained_var: 0.9616831541061401
          vf_loss: 0.20340335058669248
    num_agent_steps_sampled: 1378620
    num_agent_steps_trained: 1378620
    num_steps_sampled: 1378620
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,690,18943.8,1378620,8.6071,14.71,-0.15,96.82


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1380618
  custom_metrics: {}
  date: 2021-11-09_09-01-46
  done: false
  episode_len_mean: 95.57
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.946800000000017
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 21
  episodes_total: 13577
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.2927107481729416
          entropy_coeff: 0.009999999999999998
          kl: 0.010467782327326267
          policy_loss: -0.003602953204175546
          total_loss: 0.13348480439523147
          vf_explained_var: 0.984724760055542
          vf_loss: 0.1415263874544984
    num_agent_steps_sampled: 1380618
    num_agent_steps_trained: 1380618
    num_steps_sampled: 1380618
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,691,18972.5,1380618,8.9468,14.71,-0.15,95.57


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1382616
  custom_metrics: {}
  date: 2021-11-09_09-02-14
  done: false
  episode_len_mean: 94.05
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.891800000000018
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 21
  episodes_total: 13598
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.2721043830826169
          entropy_coeff: 0.009999999999999998
          kl: 0.009710206886207846
          policy_loss: -0.07200060349312566
          total_loss: 0.026134473865940457
          vf_explained_var: 0.9855723977088928
          vf_loss: 0.10298197271213645
    num_agent_steps_sampled: 1382616
    num_agent_steps_trained: 1382616
    num_steps_sampled: 1382616
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,692,19000.8,1382616,8.8918,14.71,-0.15,94.05


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1384614
  custom_metrics: {}
  date: 2021-11-09_09-02-40
  done: false
  episode_len_mean: 95.36
  episode_media: {}
  episode_reward_max: 14.770000000000014
  episode_reward_mean: 8.880800000000017
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 20
  episodes_total: 13618
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3554275296983265
          entropy_coeff: 0.009999999999999998
          kl: 0.011626010565616357
          policy_loss: -0.051418148318216915
          total_loss: 0.09265112713467152
          vf_explained_var: 0.9800949692726135
          vf_loss: 0.14819584964286714
    num_agent_steps_sampled: 1384614
    num_agent_steps_trained: 1384614
    num_steps_sampled: 1384614
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,693,19026.1,1384614,8.8808,14.77,-0.15,95.36


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1386612
  custom_metrics: {}
  date: 2021-11-09_09-03-06
  done: false
  episode_len_mean: 96.84
  episode_media: {}
  episode_reward_max: 14.770000000000014
  episode_reward_mean: 8.948700000000017
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 20
  episodes_total: 13638
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3252718766530356
          entropy_coeff: 0.009999999999999998
          kl: 0.01259927665571874
          policy_loss: -0.019165671395049207
          total_loss: 0.024091794137798606
          vf_explained_var: 0.9863520860671997
          vf_loss: 0.04629324819626553
    num_agent_steps_sampled: 1386612
    num_agent_steps_trained: 1386612
    num_steps_sampled: 1386612
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,694,19052.4,1386612,8.9487,14.77,-0.15,96.84


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1388610
  custom_metrics: {}
  date: 2021-11-09_09-03-33
  done: false
  episode_len_mean: 97.25
  episode_media: {}
  episode_reward_max: 14.770000000000014
  episode_reward_mean: 9.31950000000002
  episode_reward_min: 4.290000000000022
  episodes_this_iter: 20
  episodes_total: 13658
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.2093930817785716
          entropy_coeff: 0.009999999999999998
          kl: 0.010795493346354488
          policy_loss: -0.02450297267309257
          total_loss: 0.025695695053963435
          vf_explained_var: 0.9906748533248901
          vf_loss: 0.05353837632352398
    num_agent_steps_sampled: 1388610
    num_agent_steps_trained: 1388610
    num_steps_sampled: 1388610
    num_steps_trained: 138

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,695,19079.3,1388610,9.3195,14.77,4.29,97.25


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1390608
  custom_metrics: {}
  date: 2021-11-09_09-03-58
  done: false
  episode_len_mean: 98.86
  episode_media: {}
  episode_reward_max: 14.770000000000014
  episode_reward_mean: 8.834700000000018
  episode_reward_min: 2.7600000000000167
  episodes_this_iter: 20
  episodes_total: 13678
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3315526525179544
          entropy_coeff: 0.009999999999999998
          kl: 0.009471123882516214
          policy_loss: -0.04954805828276135
          total_loss: 0.05370632562608946
          vf_explained_var: 0.9756466150283813
          vf_loss: 0.10888963871236358
    num_agent_steps_sampled: 1390608
    num_agent_steps_trained: 1390608
    num_steps_sampled: 1390608
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,696,19104.1,1390608,8.8347,14.77,2.76,98.86


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1392606
  custom_metrics: {}
  date: 2021-11-09_09-04-22
  done: false
  episode_len_mean: 101.72
  episode_media: {}
  episode_reward_max: 14.770000000000014
  episode_reward_mean: 8.629400000000018
  episode_reward_min: 2.7600000000000167
  episodes_this_iter: 19
  episodes_total: 13697
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3155990980920338
          entropy_coeff: 0.009999999999999998
          kl: 0.009762484711078562
          policy_loss: -0.02726903690823487
          total_loss: 0.08189096273410888
          vf_explained_var: 0.9821460247039795
          vf_loss: 0.11439944762913953
    num_agent_steps_sampled: 1392606
    num_agent_steps_trained: 1392606
    num_steps_sampled: 1392606
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,697,19128.5,1392606,8.6294,14.77,2.76,101.72


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1394604
  custom_metrics: {}
  date: 2021-11-09_09-04-48
  done: false
  episode_len_mean: 100.78
  episode_media: {}
  episode_reward_max: 14.760000000000014
  episode_reward_mean: 8.535800000000018
  episode_reward_min: 2.7600000000000167
  episodes_this_iter: 19
  episodes_total: 13716
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3443699859437488
          entropy_coeff: 0.009999999999999998
          kl: 0.013684543733235834
          policy_loss: -0.027837980893396196
          total_loss: 0.07579493566992737
          vf_explained_var: 0.964390218257904
          vf_loss: 0.10597962127732379
    num_agent_steps_sampled: 1394604
    num_agent_steps_trained: 1394604
    num_steps_sampled: 1394604
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,698,19154,1394604,8.5358,14.76,2.76,100.78


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1396602
  custom_metrics: {}
  date: 2021-11-09_09-05-14
  done: false
  episode_len_mean: 101.32
  episode_media: {}
  episode_reward_max: 14.760000000000014
  episode_reward_mean: 8.498600000000017
  episode_reward_min: 2.7600000000000167
  episodes_this_iter: 19
  episodes_total: 13735
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.2333616097768147
          entropy_coeff: 0.009999999999999998
          kl: 0.010746766872083905
          policy_loss: -0.0571823254405033
          total_loss: 0.021458234371883527
          vf_explained_var: 0.9888936877250671
          vf_loss: 0.0822594639179962
    num_agent_steps_sampled: 1396602
    num_agent_steps_trained: 1396602
    num_steps_sampled: 1396602
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,699,19180.3,1396602,8.4986,14.76,2.76,101.32


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1398600
  custom_metrics: {}
  date: 2021-11-09_09-05-43
  done: false
  episode_len_mean: 101.05
  episode_media: {}
  episode_reward_max: 14.670000000000012
  episode_reward_mean: 8.66740000000002
  episode_reward_min: 2.7600000000000167
  episodes_this_iter: 22
  episodes_total: 13757
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.19825515520005
          entropy_coeff: 0.009999999999999998
          kl: 0.01382894993892035
          policy_loss: -0.015305639767930621
          total_loss: 0.11432807807411466
          vf_explained_var: 0.9878824353218079
          vf_loss: 0.13040217596682765
    num_agent_steps_sampled: 1398600
    num_agent_steps_trained: 1398600
    num_steps_sampled: 1398600
    num_steps_trained: 1398

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,700,19209.1,1398600,8.6674,14.67,2.76,101.05


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1400598
  custom_metrics: {}
  date: 2021-11-09_09-06-08
  done: false
  episode_len_mean: 100.84
  episode_media: {}
  episode_reward_max: 14.670000000000012
  episode_reward_mean: 8.886700000000019
  episode_reward_min: 2.4500000000000117
  episodes_this_iter: 19
  episodes_total: 13776
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3206002717926388
          entropy_coeff: 0.009999999999999998
          kl: 0.006851144472114062
          policy_loss: -0.1352340169250965
          total_loss: -0.03465234695800713
          vf_explained_var: 0.9776091575622559
          vf_loss: 0.10823198109865188
    num_agent_steps_sampled: 1400598
    num_agent_steps_trained: 1400598
    num_steps_sampled: 1400598
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,701,19234,1400598,8.8867,14.67,2.45,100.84


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1402596
  custom_metrics: {}
  date: 2021-11-09_09-06-34
  done: false
  episode_len_mean: 100.39
  episode_media: {}
  episode_reward_max: 14.670000000000012
  episode_reward_mean: 9.182700000000018
  episode_reward_min: 2.4500000000000117
  episodes_this_iter: 21
  episodes_total: 13797
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.295796297277723
          entropy_coeff: 0.009999999999999998
          kl: 0.012315355200212383
          policy_loss: -0.006641550016190325
          total_loss: 0.1367329704087405
          vf_explained_var: 0.9847707152366638
          vf_loss: 0.14634578230657747
    num_agent_steps_sampled: 1402596
    num_agent_steps_trained: 1402596
    num_steps_sampled: 1402596
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,702,19260,1402596,9.1827,14.67,2.45,100.39


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1404594
  custom_metrics: {}
  date: 2021-11-09_09-07-01
  done: false
  episode_len_mean: 99.96
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 9.328900000000017
  episode_reward_min: 2.4500000000000117
  episodes_this_iter: 18
  episodes_total: 13815
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3112182526361376
          entropy_coeff: 0.009999999999999998
          kl: 0.006881269986514948
          policy_loss: -0.11725655265507244
          total_loss: -0.02106045557274705
          vf_explained_var: 0.9717731475830078
          vf_loss: 0.1037281553837515
    num_agent_steps_sampled: 1404594
    num_agent_steps_trained: 1404594
    num_steps_sampled: 1404594
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,703,19287,1404594,9.3289,14.67,2.45,99.96


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1406592
  custom_metrics: {}
  date: 2021-11-09_09-07-28
  done: false
  episode_len_mean: 98.56
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 9.504100000000017
  episode_reward_min: 2.4500000000000117
  episodes_this_iter: 22
  episodes_total: 13837
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.2038532864479792
          entropy_coeff: 0.009999999999999998
          kl: 0.010390783466387925
          policy_loss: -0.004999851701515062
          total_loss: 0.109050997843345
          vf_explained_var: 0.9874171614646912
          vf_loss: 0.11766334328623045
    num_agent_steps_sampled: 1406592
    num_agent_steps_trained: 1406592
    num_steps_sampled: 1406592
    num_steps_trained: 140

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,704,19314.4,1406592,9.5041,14.67,2.45,98.56


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1408590
  custom_metrics: {}
  date: 2021-11-09_09-07-54
  done: false
  episode_len_mean: 99.75
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 9.118900000000018
  episode_reward_min: 2.4500000000000117
  episodes_this_iter: 20
  episodes_total: 13857
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.2683304820741925
          entropy_coeff: 0.009999999999999998
          kl: 0.01928933892390318
          policy_loss: -0.025454692977170148
          total_loss: 0.1338591212761544
          vf_explained_var: 0.9784581065177917
          vf_loss: 0.15635511147834005
    num_agent_steps_sampled: 1408590
    num_agent_steps_trained: 1408590
    num_steps_sampled: 1408590
    num_steps_trained: 140

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,705,19340.2,1408590,9.1189,14.67,2.45,99.75


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1410588
  custom_metrics: {}
  date: 2021-11-09_09-08-18
  done: false
  episode_len_mean: 100.83
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 8.84140000000002
  episode_reward_min: 2.3800000000000194
  episodes_this_iter: 18
  episodes_total: 13875
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.367545599596841
          entropy_coeff: 0.009999999999999998
          kl: 0.009017536156109911
          policy_loss: -0.056267387473157474
          total_loss: 0.018105980026580037
          vf_explained_var: 0.9762893915176392
          vf_loss: 0.08073637047339054
    num_agent_steps_sampled: 1410588
    num_agent_steps_trained: 1410588
    num_steps_sampled: 1410588
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,706,19364.4,1410588,8.8414,14.67,2.38,100.83


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1412586
  custom_metrics: {}
  date: 2021-11-09_09-08-45
  done: false
  episode_len_mean: 101.24
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 8.60570000000002
  episode_reward_min: 2.3800000000000194
  episodes_this_iter: 20
  episodes_total: 13895
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3285768946011862
          entropy_coeff: 0.009999999999999998
          kl: 0.013331427538354809
          policy_loss: -0.033077572605439594
          total_loss: 0.11261446782875628
          vf_explained_var: 0.9804988503456116
          vf_loss: 0.14816715923093615
    num_agent_steps_sampled: 1412586
    num_agent_steps_trained: 1412586
    num_steps_sampled: 1412586
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,707,19390.8,1412586,8.6057,14.67,2.38,101.24




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1414584
  custom_metrics: {}
  date: 2021-11-09_09-09-31
  done: false
  episode_len_mean: 97.96
  episode_media: {}
  episode_reward_max: 14.640000000000013
  episode_reward_mean: 8.408900000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 23
  episodes_total: 13918
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.2505701479457674
          entropy_coeff: 0.009999999999999998
          kl: 0.00810755379948203
          policy_loss: -0.01854188335793359
          total_loss: 0.33633456913133464
          vf_explained_var: 0.945305585861206
          vf_loss: 0.3608076205299724
    num_agent_steps_sampled: 1414584
    num_agent_steps_trained: 1414584
    num_steps_sampled: 1414584
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,708,19436.8,1414584,8.4089,14.64,-0.06,97.96


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1416582
  custom_metrics: {}
  date: 2021-11-09_09-09-56
  done: false
  episode_len_mean: 98.86
  episode_media: {}
  episode_reward_max: 14.640000000000013
  episode_reward_mean: 8.141900000000016
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 13939
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3004207849502563
          entropy_coeff: 0.009999999999999998
          kl: 0.015985798015870956
          policy_loss: -0.05681214658986954
          total_loss: 0.18876212109323767
          vf_explained_var: 0.9635576009750366
          vf_loss: 0.24561535723152614
    num_agent_steps_sampled: 1416582
    num_agent_steps_trained: 1416582
    num_steps_sampled: 1416582
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,709,19461.6,1416582,8.1419,14.64,-0.06,98.86


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1418580
  custom_metrics: {}
  date: 2021-11-09_09-10-21
  done: false
  episode_len_mean: 97.75
  episode_media: {}
  episode_reward_max: 14.640000000000013
  episode_reward_mean: 8.415900000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 13959
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.2797698616981505
          entropy_coeff: 0.009999999999999998
          kl: 0.011893132661756853
          policy_loss: -0.022780553588554974
          total_loss: 0.13693864293219077
          vf_explained_var: 0.9784082174301147
          vf_loss: 0.1628725815741789
    num_agent_steps_sampled: 1418580
    num_agent_steps_trained: 1418580
    num_steps_sampled: 1418580
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,710,19487.2,1418580,8.4159,14.64,-0.06,97.75


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1420578
  custom_metrics: {}
  date: 2021-11-09_09-10-47
  done: false
  episode_len_mean: 96.44
  episode_media: {}
  episode_reward_max: 14.550000000000015
  episode_reward_mean: 8.465000000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 13979
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.2638129977952866
          entropy_coeff: 0.009999999999999998
          kl: 0.014140880565890626
          policy_loss: -0.05343586547921101
          total_loss: 0.14916546085760707
          vf_explained_var: 0.9736220836639404
          vf_loss: 0.20377240895870186
    num_agent_steps_sampled: 1420578
    num_agent_steps_trained: 1420578
    num_steps_sampled: 1420578
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,711,19513.3,1420578,8.465,14.55,-0.06,96.44


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1422576
  custom_metrics: {}
  date: 2021-11-09_09-11-14
  done: false
  episode_len_mean: 94.34
  episode_media: {}
  episode_reward_max: 14.630000000000015
  episode_reward_mean: 8.516800000000016
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 22
  episodes_total: 14001
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.1847849249839784
          entropy_coeff: 0.009999999999999998
          kl: 0.010906800150229672
          policy_loss: -0.015475643231045631
          total_loss: 0.1261001084177267
          vf_explained_var: 0.9736136794090271
          vf_loss: 0.14457911863213493
    num_agent_steps_sampled: 1422576
    num_agent_steps_trained: 1422576
    num_steps_sampled: 1422576
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,712,19539.5,1422576,8.5168,14.63,-0.06,94.34


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1424574
  custom_metrics: {}
  date: 2021-11-09_09-11-38
  done: false
  episode_len_mean: 97.71
  episode_media: {}
  episode_reward_max: 14.630000000000015
  episode_reward_mean: 8.603500000000018
  episode_reward_min: 2.560000000000019
  episodes_this_iter: 19
  episodes_total: 14020
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3898662550108773
          entropy_coeff: 0.009999999999999998
          kl: 0.010506506437683453
          policy_loss: -0.06077478217581908
          total_loss: 0.020398384447963464
          vf_explained_var: 0.9858205318450928
          vf_loss: 0.08655194848598469
    num_agent_steps_sampled: 1424574
    num_agent_steps_trained: 1424574
    num_steps_sampled: 1424574
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,713,19563.7,1424574,8.6035,14.63,2.56,97.71


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1426572
  custom_metrics: {}
  date: 2021-11-09_09-12-04
  done: false
  episode_len_mean: 97.25
  episode_media: {}
  episode_reward_max: 14.630000000000015
  episode_reward_mean: 8.987800000000016
  episode_reward_min: 4.050000000000024
  episodes_this_iter: 21
  episodes_total: 14041
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.283646705604735
          entropy_coeff: 0.009999999999999998
          kl: 0.011960673184501601
          policy_loss: -0.0598969089399491
          total_loss: 0.11982433633612735
          vf_explained_var: 0.9832010865211487
          vf_loss: 0.18285862654447554
    num_agent_steps_sampled: 1426572
    num_agent_steps_trained: 1426572
    num_steps_sampled: 1426572
    num_steps_trained: 14265

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,714,19590,1426572,8.9878,14.63,4.05,97.25


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1428570
  custom_metrics: {}
  date: 2021-11-09_09-12-29
  done: false
  episode_len_mean: 98.21
  episode_media: {}
  episode_reward_max: 14.630000000000015
  episode_reward_mean: 8.920600000000018
  episode_reward_min: 2.04000000000003
  episodes_this_iter: 20
  episodes_total: 14061
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.4393076652572268
          entropy_coeff: 0.009999999999999998
          kl: 0.010062287145701019
          policy_loss: -0.030382744914719036
          total_loss: 0.08409307568023602
          vf_explained_var: 0.9835439324378967
          vf_loss: 0.12070924055186055
    num_agent_steps_sampled: 1428570
    num_agent_steps_trained: 1428570
    num_steps_sampled: 1428570
    num_steps_trained: 142

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,715,19614.7,1428570,8.9206,14.63,2.04,98.21


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1430568
  custom_metrics: {}
  date: 2021-11-09_09-12-55
  done: false
  episode_len_mean: 99.12
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.863000000000019
  episode_reward_min: 2.04000000000003
  episodes_this_iter: 20
  episodes_total: 14081
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3018848027501788
          entropy_coeff: 0.009999999999999998
          kl: 0.013983792233172126
          policy_loss: -0.05473707162198566
          total_loss: 0.08569193365318435
          vf_explained_var: 0.9665969014167786
          vf_loss: 0.14210818997096447
    num_agent_steps_sampled: 1430568
    num_agent_steps_trained: 1430568
    num_steps_sampled: 1430568
    num_steps_trained: 1430

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,716,19640.6,1430568,8.863,14.71,2.04,99.12


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1432566
  custom_metrics: {}
  date: 2021-11-09_09-13-20
  done: false
  episode_len_mean: 100.09
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.94220000000002
  episode_reward_min: 2.04000000000003
  episodes_this_iter: 20
  episodes_total: 14101
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3651200606709435
          entropy_coeff: 0.009999999999999998
          kl: 0.009418473009130785
          policy_loss: 0.007640982507949784
          total_loss: 0.06691027070794786
          vf_explained_var: 0.9900712966918945
          vf_loss: 0.06528291017526672
    num_agent_steps_sampled: 1432566
    num_agent_steps_trained: 1432566
    num_steps_sampled: 1432566
    num_steps_trained: 1432

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,717,19665.9,1432566,8.9422,14.71,2.04,100.09


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1434564
  custom_metrics: {}
  date: 2021-11-09_09-13-45
  done: false
  episode_len_mean: 99.2
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.971000000000016
  episode_reward_min: 2.04000000000003
  episodes_this_iter: 20
  episodes_total: 14121
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3411444124721346
          entropy_coeff: 0.009999999999999998
          kl: 0.010797664821245504
          policy_loss: -0.053732581028626075
          total_loss: 0.06073695449602036
          vf_explained_var: 0.9834532737731934
          vf_loss: 0.1191249972830216
    num_agent_steps_sampled: 1434564
    num_agent_steps_trained: 1434564
    num_steps_sampled: 1434564
    num_steps_trained: 14345

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,718,19691,1434564,8.971,14.71,2.04,99.2


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1436562
  custom_metrics: {}
  date: 2021-11-09_09-14-11
  done: false
  episode_len_mean: 99.09
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 9.005200000000016
  episode_reward_min: 2.04000000000003
  episodes_this_iter: 20
  episodes_total: 14141
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3037128068151929
          entropy_coeff: 0.009999999999999998
          kl: 0.008691990720124844
          policy_loss: -0.02323718390294484
          total_loss: 0.030374400370887348
          vf_explained_var: 0.9944575428962708
          vf_loss: 0.05960024976659389
    num_agent_steps_sampled: 1436562
    num_agent_steps_trained: 1436562
    num_steps_sampled: 1436562
    num_steps_trained: 143

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,719,19716.3,1436562,9.0052,14.71,2.04,99.09


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1438560
  custom_metrics: {}
  date: 2021-11-09_09-14-37
  done: false
  episode_len_mean: 98.06
  episode_media: {}
  episode_reward_max: 14.710000000000013
  episode_reward_mean: 8.992400000000016
  episode_reward_min: 3.7900000000000276
  episodes_this_iter: 22
  episodes_total: 14163
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3613367903800238
          entropy_coeff: 0.009999999999999998
          kl: 0.010008571995937784
          policy_loss: -0.016655310881989342
          total_loss: 0.13637435273045584
          vf_explained_var: 0.9823057651519775
          vf_loss: 0.15852693377860955
    num_agent_steps_sampled: 1438560
    num_agent_steps_trained: 1438560
    num_steps_sampled: 1438560
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,720,19742.2,1438560,8.9924,14.71,3.79,98.06


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1440558
  custom_metrics: {}
  date: 2021-11-09_09-15-03
  done: false
  episode_len_mean: 97.56
  episode_media: {}
  episode_reward_max: 14.680000000000012
  episode_reward_mean: 9.351500000000017
  episode_reward_min: 6.020000000000017
  episodes_this_iter: 20
  episodes_total: 14183
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.376398898306347
          entropy_coeff: 0.009999999999999998
          kl: 0.012197637569473459
          policy_loss: -0.026145340182951518
          total_loss: 0.07024775284032027
          vf_explained_var: 0.989946186542511
          vf_loss: 0.10026583929679224
    num_agent_steps_sampled: 1440558
    num_agent_steps_trained: 1440558
    num_steps_sampled: 1440558
    num_steps_trained: 1440

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,721,19768.7,1440558,9.3515,14.68,6.02,97.56


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1442556
  custom_metrics: {}
  date: 2021-11-09_09-15-29
  done: false
  episode_len_mean: 96.85
  episode_media: {}
  episode_reward_max: 14.720000000000015
  episode_reward_mean: 9.513700000000018
  episode_reward_min: 4.40000000000002
  episodes_this_iter: 21
  episodes_total: 14204
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.2734625393436068
          entropy_coeff: 0.009999999999999998
          kl: 0.008345899529486223
          policy_loss: -0.03366851003042289
          total_loss: 0.05077763444611005
          vf_explained_var: 0.9903940558433533
          vf_loss: 0.0904129566713458
    num_agent_steps_sampled: 1442556
    num_agent_steps_trained: 1442556
    num_steps_sampled: 1442556
    num_steps_trained: 14425

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,722,19794.3,1442556,9.5137,14.72,4.4,96.85


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1444554
  custom_metrics: {}
  date: 2021-11-09_09-15-54
  done: false
  episode_len_mean: 97.34
  episode_media: {}
  episode_reward_max: 14.720000000000015
  episode_reward_mean: 9.178900000000018
  episode_reward_min: 4.09000000000002
  episodes_this_iter: 20
  episodes_total: 14224
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3948853447323755
          entropy_coeff: 0.009999999999999998
          kl: 0.0137715748408687
          policy_loss: -0.032554617417710166
          total_loss: 0.07501030473836831
          vf_explained_var: 0.9793698191642761
          vf_loss: 0.11034620351025036
    num_agent_steps_sampled: 1444554
    num_agent_steps_trained: 1444554
    num_steps_sampled: 1444554
    num_steps_trained: 14445

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,723,19819.3,1444554,9.1789,14.72,4.09,97.34


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1446552
  custom_metrics: {}
  date: 2021-11-09_09-16-19
  done: false
  episode_len_mean: 98.02
  episode_media: {}
  episode_reward_max: 14.720000000000015
  episode_reward_mean: 8.925900000000018
  episode_reward_min: 4.09000000000002
  episodes_this_iter: 20
  episodes_total: 14244
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3157034181413196
          entropy_coeff: 0.009999999999999998
          kl: 0.013054119493138356
          policy_loss: -0.013813453753079687
          total_loss: 0.2439291763163748
          vf_explained_var: 0.968942403793335
          vf_loss: 0.26031388665239014
    num_agent_steps_sampled: 1446552
    num_agent_steps_trained: 1446552
    num_steps_sampled: 1446552
    num_steps_trained: 14465

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,724,19844.7,1446552,8.9259,14.72,4.09,98.02




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1448550
  custom_metrics: {}
  date: 2021-11-09_09-17-02
  done: false
  episode_len_mean: 96.29
  episode_media: {}
  episode_reward_max: 14.720000000000015
  episode_reward_mean: 8.81620000000002
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 22
  episodes_total: 14266
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3380583734739395
          entropy_coeff: 0.009999999999999998
          kl: 0.015794081772538552
          policy_loss: 0.015332226614866938
          total_loss: 0.5111816029818285
          vf_explained_var: 0.9383745193481445
          vf_loss: 0.49642230286484673
    num_agent_steps_sampled: 1448550
    num_agent_steps_trained: 1448550
    num_steps_sampled: 1448550
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,725,19887.3,1448550,8.8162,14.72,-0.06,96.29




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1450548
  custom_metrics: {}
  date: 2021-11-09_09-17-40
  done: false
  episode_len_mean: 96.85
  episode_media: {}
  episode_reward_max: 14.720000000000015
  episode_reward_mean: 8.417700000000016
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 14286
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.4408395338626134
          entropy_coeff: 0.009999999999999998
          kl: 0.01083991841691267
          policy_loss: -0.08202542591662634
          total_loss: 0.04049351210040705
          vf_explained_var: 0.9768261313438416
          vf_loss: 0.12813708478850977
    num_agent_steps_sampled: 1450548
    num_agent_steps_trained: 1450548
    num_steps_sampled: 1450548
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,726,19925.2,1450548,8.4177,14.72,-0.06,96.85


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1452546
  custom_metrics: {}
  date: 2021-11-09_09-18-05
  done: false
  episode_len_mean: 97.86
  episode_media: {}
  episode_reward_max: 14.550000000000015
  episode_reward_mean: 8.173200000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 14307
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3066244710059394
          entropy_coeff: 0.009999999999999998
          kl: 0.0177831616465032
          policy_loss: -0.026724471692882834
          total_loss: 0.3243134947404975
          vf_explained_var: 0.9593973755836487
          vf_loss: 0.34968357877362344
    num_agent_steps_sampled: 1452546
    num_agent_steps_trained: 1452546
    num_steps_sampled: 1452546
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,727,19950.6,1452546,8.1732,14.55,-0.06,97.86


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1454544
  custom_metrics: {}
  date: 2021-11-09_09-18-31
  done: false
  episode_len_mean: 97.87
  episode_media: {}
  episode_reward_max: 14.690000000000014
  episode_reward_mean: 8.525800000000016
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 14326
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3555492781457448
          entropy_coeff: 0.009999999999999998
          kl: 0.013693946535145135
          policy_loss: -0.016771529881017547
          total_loss: 0.17864038470600332
          vf_explained_var: 0.9812930226325989
          vf_loss: 0.19786278185035502
    num_agent_steps_sampled: 1454544
    num_agent_steps_trained: 1454544
    num_steps_sampled: 1454544
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,728,19976.1,1454544,8.5258,14.69,-0.06,97.87


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1456542
  custom_metrics: {}
  date: 2021-11-09_09-18-55
  done: false
  episode_len_mean: 97.82
  episode_media: {}
  episode_reward_max: 14.690000000000014
  episode_reward_mean: 8.597400000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 14346
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.4168452773775373
          entropy_coeff: 0.009999999999999998
          kl: 0.014484459326759978
          policy_loss: -0.019293680893523353
          total_loss: 0.2188887469027014
          vf_explained_var: 0.9649420380592346
          vf_loss: 0.24060521786588998
    num_agent_steps_sampled: 1456542
    num_agent_steps_trained: 1456542
    num_steps_sampled: 1456542
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,729,20000.4,1456542,8.5974,14.69,-0.06,97.82


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1458540
  custom_metrics: {}
  date: 2021-11-09_09-19-21
  done: false
  episode_len_mean: 99.15
  episode_media: {}
  episode_reward_max: 14.690000000000014
  episode_reward_mean: 8.864100000000017
  episode_reward_min: 2.240000000000014
  episodes_this_iter: 21
  episodes_total: 14367
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.255632468064626
          entropy_coeff: 0.009999999999999998
          kl: 0.011781849452241151
          policy_loss: -0.021836299990259465
          total_loss: 0.21769836745446636
          vf_explained_var: 0.971335768699646
          vf_loss: 0.24253691945757186
    num_agent_steps_sampled: 1458540
    num_agent_steps_trained: 1458540
    num_steps_sampled: 1458540
    num_steps_trained: 1458

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,730,20026.7,1458540,8.8641,14.69,2.24,99.15


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1460538
  custom_metrics: {}
  date: 2021-11-09_09-19-47
  done: false
  episode_len_mean: 98.17
  episode_media: {}
  episode_reward_max: 14.690000000000014
  episode_reward_mean: 8.875400000000017
  episode_reward_min: 2.240000000000014
  episodes_this_iter: 21
  episodes_total: 14388
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.284760924748012
          entropy_coeff: 0.009999999999999998
          kl: 0.011622344286517818
          policy_loss: -0.01653712076090631
          total_loss: 0.18227842544161138
          vf_explained_var: 0.9699729084968567
          vf_loss: 0.2022384255769707
    num_agent_steps_sampled: 1460538
    num_agent_steps_trained: 1460538
    num_steps_sampled: 1460538
    num_steps_trained: 14605

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,731,20052,1460538,8.8754,14.69,2.24,98.17


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1462536
  custom_metrics: {}
  date: 2021-11-09_09-20-12
  done: false
  episode_len_mean: 98.02
  episode_media: {}
  episode_reward_max: 14.690000000000014
  episode_reward_mean: 9.244300000000017
  episode_reward_min: 2.7000000000000184
  episodes_this_iter: 20
  episodes_total: 14408
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3360064756302608
          entropy_coeff: 0.009999999999999998
          kl: 0.010349919547245621
          policy_loss: -0.06597645657048339
          total_loss: 0.0940927607672555
          vf_explained_var: 0.9790105819702148
          vf_loss: 0.1650363821891092
    num_agent_steps_sampled: 1462536
    num_agent_steps_trained: 1462536
    num_steps_sampled: 1462536
    num_steps_trained: 1462

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,732,20077.6,1462536,9.2443,14.69,2.7,98.02


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1464534
  custom_metrics: {}
  date: 2021-11-09_09-20-36
  done: false
  episode_len_mean: 98.43
  episode_media: {}
  episode_reward_max: 14.630000000000015
  episode_reward_mean: 8.865000000000018
  episode_reward_min: 3.0400000000000156
  episodes_this_iter: 19
  episodes_total: 14427
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.4012778957684835
          entropy_coeff: 0.009999999999999998
          kl: 0.011349619038908573
          policy_loss: -0.016447316393965766
          total_loss: 0.14325855683890126
          vf_explained_var: 0.977283775806427
          vf_loss: 0.16451508053356692
    num_agent_steps_sampled: 1464534
    num_agent_steps_trained: 1464534
    num_steps_sampled: 1464534
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,733,20101.7,1464534,8.865,14.63,3.04,98.43


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1466532
  custom_metrics: {}
  date: 2021-11-09_09-21-02
  done: false
  episode_len_mean: 98.78
  episode_media: {}
  episode_reward_max: 14.610000000000014
  episode_reward_mean: 8.927100000000017
  episode_reward_min: 3.0400000000000156
  episodes_this_iter: 21
  episodes_total: 14448
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.40463026818775
          entropy_coeff: 0.009999999999999998
          kl: 0.011156920271087974
          policy_loss: -0.02730188878873984
          total_loss: 0.19583196712746506
          vf_explained_var: 0.9691962599754333
          vf_loss: 0.22813285037520387
    num_agent_steps_sampled: 1466532
    num_agent_steps_trained: 1466532
    num_steps_sampled: 1466532
    num_steps_trained: 1466

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,734,20127,1466532,8.9271,14.61,3.04,98.78


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1468530
  custom_metrics: {}
  date: 2021-11-09_09-21-28
  done: false
  episode_len_mean: 98.5
  episode_media: {}
  episode_reward_max: 14.620000000000015
  episode_reward_mean: 9.031900000000014
  episode_reward_min: 3.0400000000000156
  episodes_this_iter: 20
  episodes_total: 14468
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.2884160643532163
          entropy_coeff: 0.009999999999999998
          kl: 0.01556849931937716
          policy_loss: -0.004182571740377517
          total_loss: 0.2679397824708195
          vf_explained_var: 0.9700998067855835
          vf_loss: 0.2723817912063428
    num_agent_steps_sampled: 1468530
    num_agent_steps_trained: 1468530
    num_steps_sampled: 1468530
    num_steps_trained: 14685

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,735,20153.1,1468530,9.0319,14.62,3.04,98.5


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1470528
  custom_metrics: {}
  date: 2021-11-09_09-21-53
  done: false
  episode_len_mean: 100.0
  episode_media: {}
  episode_reward_max: 14.620000000000015
  episode_reward_mean: 9.126900000000017
  episode_reward_min: 2.8200000000000176
  episodes_this_iter: 19
  episodes_total: 14487
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.4024760371162779
          entropy_coeff: 0.009999999999999998
          kl: 0.014763886808595006
          policy_loss: -0.04632566686542261
          total_loss: 0.2219617084910472
          vf_explained_var: 0.9553524851799011
          vf_loss: 0.27033988787304786
    num_agent_steps_sampled: 1470528
    num_agent_steps_trained: 1470528
    num_steps_sampled: 1470528
    num_steps_trained: 147

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,736,20178.1,1470528,9.1269,14.62,2.82,100


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1472526
  custom_metrics: {}
  date: 2021-11-09_09-22-20
  done: false
  episode_len_mean: 101.55
  episode_media: {}
  episode_reward_max: 14.620000000000015
  episode_reward_mean: 8.774900000000017
  episode_reward_min: 2.5300000000000233
  episodes_this_iter: 19
  episodes_total: 14506
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.4111402500243413
          entropy_coeff: 0.009999999999999998
          kl: 0.008770753231209127
          policy_loss: 0.011650483292483148
          total_loss: 0.11237498610502197
          vf_explained_var: 0.9784845113754272
          vf_loss: 0.10772357448225929
    num_agent_steps_sampled: 1472526
    num_agent_steps_trained: 1472526
    num_steps_sampled: 1472526
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,737,20204.8,1472526,8.7749,14.62,2.53,101.55


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1474524
  custom_metrics: {}
  date: 2021-11-09_09-22-46
  done: false
  episode_len_mean: 101.13
  episode_media: {}
  episode_reward_max: 14.620000000000015
  episode_reward_mean: 8.918000000000019
  episode_reward_min: 2.5300000000000233
  episodes_this_iter: 20
  episodes_total: 14526
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3654576874914623
          entropy_coeff: 0.009999999999999998
          kl: 0.010941719558483573
          policy_loss: -0.08572297587635971
          total_loss: 0.05570207302059446
          vf_explained_var: 0.9854839444160461
          vf_loss: 0.14620682669005225
    num_agent_steps_sampled: 1474524
    num_agent_steps_trained: 1474524
    num_steps_sampled: 1474524
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,738,20230.7,1474524,8.918,14.62,2.53,101.13


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1476522
  custom_metrics: {}
  date: 2021-11-09_09-23-18
  done: false
  episode_len_mean: 101.38
  episode_media: {}
  episode_reward_max: 14.620000000000015
  episode_reward_mean: 8.897700000000018
  episode_reward_min: 2.5300000000000233
  episodes_this_iter: 20
  episodes_total: 14546
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3784159700075784
          entropy_coeff: 0.009999999999999998
          kl: 0.014680687126363282
          policy_loss: -0.038714049304170266
          total_loss: 0.19034699983894826
          vf_explained_var: 0.9499005079269409
          vf_loss: 0.23094042834071887
    num_agent_steps_sampled: 1476522
    num_agent_steps_trained: 1476522
    num_steps_sampled: 1476522
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,739,20262.9,1476522,8.8977,14.62,2.53,101.38


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1478520
  custom_metrics: {}
  date: 2021-11-09_09-23-47
  done: false
  episode_len_mean: 103.06
  episode_media: {}
  episode_reward_max: 14.620000000000015
  episode_reward_mean: 8.608100000000016
  episode_reward_min: 2.5300000000000233
  episodes_this_iter: 19
  episodes_total: 14565
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.2951421170007615
          entropy_coeff: 0.009999999999999998
          kl: 0.013674452472592573
          policy_loss: -0.03320092959772973
          total_loss: 0.14939617403738556
          vf_explained_var: 0.9607049822807312
          vf_loss: 0.1844597123385895
    num_agent_steps_sampled: 1478520
    num_agent_steps_trained: 1478520
    num_steps_sampled: 1478520
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,740,20292,1478520,8.6081,14.62,2.53,103.06


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1480518
  custom_metrics: {}
  date: 2021-11-09_09-24-12
  done: false
  episode_len_mean: 102.19
  episode_media: {}
  episode_reward_max: 14.430000000000012
  episode_reward_mean: 8.140100000000016
  episode_reward_min: 0.8500000000000068
  episodes_this_iter: 20
  episodes_total: 14585
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.4008424957593282
          entropy_coeff: 0.009999999999999998
          kl: 0.009497220103103946
          policy_loss: -0.06828908817399115
          total_loss: 0.053263115492605026
          vf_explained_var: 0.9783375263214111
          vf_loss: 0.12785919509118512
    num_agent_steps_sampled: 1480518
    num_agent_steps_trained: 1480518
    num_steps_sampled: 1480518
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,741,20316.9,1480518,8.1401,14.43,0.85,102.19




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1482516
  custom_metrics: {}
  date: 2021-11-09_09-24-51
  done: false
  episode_len_mean: 100.26
  episode_media: {}
  episode_reward_max: 14.510000000000018
  episode_reward_mean: 8.290200000000016
  episode_reward_min: -0.07
  episodes_this_iter: 21
  episodes_total: 14606
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3387184915088473
          entropy_coeff: 0.009999999999999998
          kl: 0.008559552786776675
          policy_loss: -0.012723932770036516
          total_loss: 0.16879298589414074
          vf_explained_var: 0.9630074501037598
          vf_loss: 0.18796303588010016
    num_agent_steps_sampled: 1482516
    num_agent_steps_trained: 1482516
    num_steps_sampled: 1482516
    num_steps_trained: 1482516
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,742,20356.4,1482516,8.2902,14.51,-0.07,100.26




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1484514
  custom_metrics: {}
  date: 2021-11-09_09-25-47
  done: false
  episode_len_mean: 99.07
  episode_media: {}
  episode_reward_max: 14.510000000000018
  episode_reward_mean: 7.916200000000016
  episode_reward_min: -0.07
  episodes_this_iter: 21
  episodes_total: 14627
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3156664138748533
          entropy_coeff: 0.009999999999999998
          kl: 0.009198049562641567
          policy_loss: -0.07803447651011604
          total_loss: 0.060144736174316635
          vf_explained_var: 0.975882887840271
          vf_loss: 0.14387704424027886
    num_agent_steps_sampled: 1484514
    num_agent_steps_trained: 1484514
    num_steps_sampled: 1484514
    num_steps_trained: 1484514
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,743,20411.5,1484514,7.9162,14.51,-0.07,99.07


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1486512
  custom_metrics: {}
  date: 2021-11-09_09-26-12
  done: false
  episode_len_mean: 98.39
  episode_media: {}
  episode_reward_max: 14.510000000000018
  episode_reward_mean: 8.117900000000017
  episode_reward_min: -0.07
  episodes_this_iter: 19
  episodes_total: 14646
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.2852475569361732
          entropy_coeff: 0.009999999999999998
          kl: 0.009964728507952655
          policy_loss: -0.023212738902795883
          total_loss: 0.08452968994998152
          vf_explained_var: 0.9823452830314636
          vf_loss: 0.11251435801386833
    num_agent_steps_sampled: 1486512
    num_agent_steps_trained: 1486512
    num_steps_sampled: 1486512
    num_steps_trained: 1486512
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,744,20436.8,1486512,8.1179,14.51,-0.07,98.39


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1488510
  custom_metrics: {}
  date: 2021-11-09_09-26-36
  done: false
  episode_len_mean: 98.81
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 8.19130000000002
  episode_reward_min: -0.07
  episodes_this_iter: 20
  episodes_total: 14666
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3580980306579953
          entropy_coeff: 0.009999999999999998
          kl: 0.012534103833421644
          policy_loss: -0.0009969731171925863
          total_loss: 0.15387288630008697
          vf_explained_var: 0.9787412881851196
          vf_loss: 0.1582867538645154
    num_agent_steps_sampled: 1488510
    num_agent_steps_trained: 1488510
    num_steps_sampled: 1488510
    num_steps_trained: 1488510
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,745,20461,1488510,8.1913,14.67,-0.07,98.81


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1490508
  custom_metrics: {}
  date: 2021-11-09_09-27-00
  done: false
  episode_len_mean: 99.96
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 8.533500000000018
  episode_reward_min: -0.07
  episodes_this_iter: 19
  episodes_total: 14685
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3102842609087626
          entropy_coeff: 0.009999999999999998
          kl: 0.009852372957830216
          policy_loss: -0.030123441400272507
          total_loss: 0.11099113364304815
          vf_explained_var: 0.9806880354881287
          vf_loss: 0.14622798290635858
    num_agent_steps_sampled: 1490508
    num_agent_steps_trained: 1490508
    num_steps_sampled: 1490508
    num_steps_trained: 1490508
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,746,20484.9,1490508,8.5335,14.67,-0.07,99.96


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1492506
  custom_metrics: {}
  date: 2021-11-09_09-27-25
  done: false
  episode_len_mean: 100.65
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 8.774300000000016
  episode_reward_min: -0.05
  episodes_this_iter: 20
  episodes_total: 14705
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3877046868914649
          entropy_coeff: 0.009999999999999998
          kl: 0.010413579273365524
          policy_loss: -0.050537983878027826
          total_loss: 0.08633773692307017
          vf_explained_var: 0.9836568236351013
          vf_loss: 0.14230824118213994
    num_agent_steps_sampled: 1492506
    num_agent_steps_trained: 1492506
    num_steps_sampled: 1492506
    num_steps_trained: 1492506
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,747,20509.3,1492506,8.7743,14.67,-0.05,100.65


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1494504
  custom_metrics: {}
  date: 2021-11-09_09-27-49
  done: false
  episode_len_mean: 102.17
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 8.764200000000018
  episode_reward_min: 2.4600000000000213
  episodes_this_iter: 20
  episodes_total: 14725
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3870780047916231
          entropy_coeff: 0.009999999999999998
          kl: 0.008174703156904682
          policy_loss: -0.054882796534470146
          total_loss: 0.009300771462065833
          vf_explained_var: 0.988538384437561
          vf_loss: 0.07142536186923583
    num_agent_steps_sampled: 1494504
    num_agent_steps_trained: 1494504
    num_steps_sampled: 1494504
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,748,20533.8,1494504,8.7642,14.67,2.46,102.17


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1496502
  custom_metrics: {}
  date: 2021-11-09_09-28-14
  done: false
  episode_len_mean: 101.33
  episode_media: {}
  episode_reward_max: 14.670000000000014
  episode_reward_mean: 8.85680000000002
  episode_reward_min: 4.580000000000021
  episodes_this_iter: 20
  episodes_total: 14745
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3014665944235666
          entropy_coeff: 0.009999999999999998
          kl: 0.008507562814928416
          policy_loss: -0.07959831158320109
          total_loss: -0.018066416467939106
          vf_explained_var: 0.9897040724754333
          vf_loss: 0.06764765281585
    num_agent_steps_sampled: 1496502
    num_agent_steps_trained: 1496502
    num_steps_sampled: 1496502
    num_steps_trained: 1496

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,749,20558.7,1496502,8.8568,14.67,4.58,101.33


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1498500
  custom_metrics: {}
  date: 2021-11-09_09-28-38
  done: false
  episode_len_mean: 100.42
  episode_media: {}
  episode_reward_max: 14.610000000000012
  episode_reward_mean: 8.982800000000019
  episode_reward_min: 4.400000000000022
  episodes_this_iter: 20
  episodes_total: 14765
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3294279098510742
          entropy_coeff: 0.009999999999999998
          kl: 0.016628562783175413
          policy_loss: -0.042413101469477014
          total_loss: 0.06885081836510272
          vf_explained_var: 0.9831401109695435
          vf_loss: 0.11107385294245822
    num_agent_steps_sampled: 1498500
    num_agent_steps_trained: 1498500
    num_steps_sampled: 1498500
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,750,20583,1498500,8.9828,14.61,4.4,100.42


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1500498
  custom_metrics: {}
  date: 2021-11-09_09-29-04
  done: false
  episode_len_mean: 100.06
  episode_media: {}
  episode_reward_max: 14.700000000000015
  episode_reward_mean: 8.895700000000017
  episode_reward_min: 3.180000000000013
  episodes_this_iter: 19
  episodes_total: 14784
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3760944156419663
          entropy_coeff: 0.009999999999999998
          kl: 0.011503674740917074
          policy_loss: -0.025973766217274326
          total_loss: 0.14077278531733015
          vf_explained_var: 0.971867024898529
          vf_loss: 0.17117899633234457
    num_agent_steps_sampled: 1500498
    num_agent_steps_trained: 1500498
    num_steps_sampled: 1500498
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,751,20608.4,1500498,8.8957,14.7,3.18,100.06


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1502496
  custom_metrics: {}
  date: 2021-11-09_09-29-29
  done: false
  episode_len_mean: 100.37
  episode_media: {}
  episode_reward_max: 14.700000000000015
  episode_reward_mean: 8.726000000000017
  episode_reward_min: 3.180000000000013
  episodes_this_iter: 21
  episodes_total: 14805
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.3419737702324277
          entropy_coeff: 0.009999999999999998
          kl: 0.013450655842873227
          policy_loss: -0.05458953281243642
          total_loss: 0.10283135394787504
          vf_explained_var: 0.9803155660629272
          vf_loss: 0.15993329080797378
    num_agent_steps_sampled: 1502496
    num_agent_steps_trained: 1502496
    num_steps_sampled: 1502496
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,752,20634,1502496,8.726,14.7,3.18,100.37


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1504494
  custom_metrics: {}
  date: 2021-11-09_09-29-54
  done: false
  episode_len_mean: 100.64
  episode_media: {}
  episode_reward_max: 14.700000000000015
  episode_reward_mean: 8.577000000000018
  episode_reward_min: 1.6500000000000279
  episodes_this_iter: 20
  episodes_total: 14825
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.810914611816406
          cur_lr: 5.000000000000001e-05
          entropy: 1.2952942581403823
          entropy_coeff: 0.009999999999999998
          kl: 0.020252566161475246
          policy_loss: -0.03257388337737038
          total_loss: 0.1344655189352731
          vf_explained_var: 0.970023512840271
          vf_loss: 0.1635692417000731
    num_agent_steps_sampled: 1504494
    num_agent_steps_trained: 1504494
    num_steps_sampled: 1504494
    num_steps_trained: 1504

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,753,20659.1,1504494,8.577,14.7,1.65,100.64


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1506492
  custom_metrics: {}
  date: 2021-11-09_09-30-18
  done: false
  episode_len_mean: 102.1
  episode_media: {}
  episode_reward_max: 14.700000000000015
  episode_reward_mean: 8.461800000000018
  episode_reward_min: 1.6500000000000279
  episodes_this_iter: 18
  episodes_total: 14843
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3490342276436942
          entropy_coeff: 0.009999999999999998
          kl: 0.011572634572735736
          policy_loss: -0.02904938023005213
          total_loss: 0.10002674632484004
          vf_explained_var: 0.9823638796806335
          vf_loss: 0.12848984184009687
    num_agent_steps_sampled: 1506492
    num_agent_steps_trained: 1506492
    num_steps_sampled: 1506492
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,754,20682.4,1506492,8.4618,14.7,1.65,102.1


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1508490
  custom_metrics: {}
  date: 2021-11-09_09-30-44
  done: false
  episode_len_mean: 102.11
  episode_media: {}
  episode_reward_max: 14.700000000000015
  episode_reward_mean: 8.452300000000019
  episode_reward_min: 1.6500000000000279
  episodes_this_iter: 20
  episodes_total: 14863
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.301761948494684
          entropy_coeff: 0.009999999999999998
          kl: 0.012528765623050424
          policy_loss: -0.06097597128578595
          total_loss: 0.12423248620082934
          vf_explained_var: 0.9773728251457214
          vf_loss: 0.18298643909039952
    num_agent_steps_sampled: 1508490
    num_agent_steps_trained: 1508490
    num_steps_sampled: 1508490
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,755,20708.4,1508490,8.4523,14.7,1.65,102.11


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1510488
  custom_metrics: {}
  date: 2021-11-09_09-31-09
  done: false
  episode_len_mean: 101.95
  episode_media: {}
  episode_reward_max: 14.590000000000016
  episode_reward_mean: 8.408400000000018
  episode_reward_min: 1.6500000000000279
  episodes_this_iter: 20
  episodes_total: 14883
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3341641238757542
          entropy_coeff: 0.009999999999999998
          kl: 0.010735460312768346
          policy_loss: -0.04067390351777985
          total_loss: 0.16443542187944765
          vf_explained_var: 0.9746527075767517
          vf_loss: 0.20539265182756242
    num_agent_steps_sampled: 1510488
    num_agent_steps_trained: 1510488
    num_steps_sampled: 1510488
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,756,20734,1510488,8.4084,14.59,1.65,101.95


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1512486
  custom_metrics: {}
  date: 2021-11-09_09-31-35
  done: false
  episode_len_mean: 101.46
  episode_media: {}
  episode_reward_max: 14.590000000000016
  episode_reward_mean: 8.260900000000019
  episode_reward_min: 1.6500000000000279
  episodes_this_iter: 21
  episodes_total: 14904
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3270491282145183
          entropy_coeff: 0.009999999999999998
          kl: 0.007279933889752262
          policy_loss: -0.08036397564269247
          total_loss: 0.014115040021992865
          vf_explained_var: 0.9825208187103271
          vf_loss: 0.09889439995444957
    num_agent_steps_sampled: 1512486
    num_agent_steps_trained: 1512486
    num_steps_sampled: 1512486
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,757,20759.5,1512486,8.2609,14.59,1.65,101.46


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1514484
  custom_metrics: {}
  date: 2021-11-09_09-31-59
  done: false
  episode_len_mean: 101.68
  episode_media: {}
  episode_reward_max: 14.490000000000016
  episode_reward_mean: 8.29860000000002
  episode_reward_min: 2.9100000000000126
  episodes_this_iter: 18
  episodes_total: 14922
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3859994235492887
          entropy_coeff: 0.009999999999999998
          kl: 0.009072487155523367
          policy_loss: -0.0624593772703693
          total_loss: 0.06332084487768866
          vf_explained_var: 0.974980354309082
          vf_loss: 0.12860469685069154
    num_agent_steps_sampled: 1514484
    num_agent_steps_trained: 1514484
    num_steps_sampled: 1514484
    num_steps_trained: 151

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,758,20783.5,1514484,8.2986,14.49,2.91,101.68


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1516482
  custom_metrics: {}
  date: 2021-11-09_09-32-23
  done: false
  episode_len_mean: 102.14
  episode_media: {}
  episode_reward_max: 14.490000000000016
  episode_reward_mean: 8.201800000000016
  episode_reward_min: 2.9100000000000126
  episodes_this_iter: 19
  episodes_total: 14941
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.376065905321212
          entropy_coeff: 0.009999999999999998
          kl: 0.008076899438439323
          policy_loss: -0.010227163100526446
          total_loss: 0.09144182606112389
          vf_explained_var: 0.9789856672286987
          vf_loss: 0.10560513725060793
    num_agent_steps_sampled: 1516482
    num_agent_steps_trained: 1516482
    num_steps_sampled: 1516482
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,759,20807.3,1516482,8.2018,14.49,2.91,102.14




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1518480
  custom_metrics: {}
  date: 2021-11-09_09-33-06
  done: false
  episode_len_mean: 100.99
  episode_media: {}
  episode_reward_max: 14.720000000000017
  episode_reward_mean: 8.407700000000018
  episode_reward_min: -0.07
  episodes_this_iter: 21
  episodes_total: 14962
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3435429170018152
          entropy_coeff: 0.009999999999999998
          kl: 0.009050339621185528
          policy_loss: -0.038155263697817215
          total_loss: 0.3968169504599202
          vf_explained_var: 0.9591821432113647
          vf_loss: 0.4373990592325018
    num_agent_steps_sampled: 1518480
    num_agent_steps_trained: 1518480
    num_steps_sampled: 1518480
    num_steps_trained: 1518480
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,760,20850.4,1518480,8.4077,14.72,-0.07,100.99




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1520478
  custom_metrics: {}
  date: 2021-11-09_09-33-46
  done: false
  episode_len_mean: 101.56
  episode_media: {}
  episode_reward_max: 14.720000000000017
  episode_reward_mean: 8.642900000000019
  episode_reward_min: -0.07
  episodes_this_iter: 19
  episodes_total: 14981
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3522028690292722
          entropy_coeff: 0.009999999999999998
          kl: 0.007253134046017063
          policy_loss: -0.05356485584662074
          total_loss: 0.06746525230507056
          vf_explained_var: 0.9817623496055603
          vf_loss: 0.12572962916677907
    num_agent_steps_sampled: 1520478
    num_agent_steps_trained: 1520478
    num_steps_sampled: 1520478
    num_steps_trained: 1520478
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,761,20890,1520478,8.6429,14.72,-0.07,101.56




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1522476
  custom_metrics: {}
  date: 2021-11-09_09-34-27
  done: false
  episode_len_mean: 101.78
  episode_media: {}
  episode_reward_max: 14.720000000000017
  episode_reward_mean: 8.786400000000018
  episode_reward_min: -0.07
  episodes_this_iter: 21
  episodes_total: 15002
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.295983304296221
          entropy_coeff: 0.009999999999999998
          kl: 0.010543704275471622
          policy_loss: 0.004936846664973668
          total_loss: 0.16584080615568728
          vf_explained_var: 0.9701365828514099
          vf_loss: 0.16103872511358489
    num_agent_steps_sampled: 1522476
    num_agent_steps_trained: 1522476
    num_steps_sampled: 1522476
    num_steps_trained: 1522476
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,762,20931,1522476,8.7864,14.72,-0.07,101.78


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1524474
  custom_metrics: {}
  date: 2021-11-09_09-34-50
  done: false
  episode_len_mean: 102.38
  episode_media: {}
  episode_reward_max: 14.720000000000017
  episode_reward_mean: 8.83960000000002
  episode_reward_min: -0.07
  episodes_this_iter: 18
  episodes_total: 15020
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.446981015659514
          entropy_coeff: 0.009999999999999998
          kl: 0.00754916692890497
          policy_loss: -0.06977074025642305
          total_loss: 0.09614415480977014
          vf_explained_var: 0.974113404750824
          vf_loss: 0.17120211198925972
    num_agent_steps_sampled: 1524474
    num_agent_steps_trained: 1524474
    num_steps_sampled: 1524474
    num_steps_trained: 1524474
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,763,20954.5,1524474,8.8396,14.72,-0.07,102.38


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1526472
  custom_metrics: {}
  date: 2021-11-09_09-35-15
  done: false
  episode_len_mean: 100.74
  episode_media: {}
  episode_reward_max: 14.720000000000017
  episode_reward_mean: 9.275300000000017
  episode_reward_min: -0.07
  episodes_this_iter: 21
  episodes_total: 15041
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3751984136445181
          entropy_coeff: 0.009999999999999998
          kl: 0.0081332013395732
          policy_loss: -0.04871730088655438
          total_loss: 0.05310261943155811
          vf_explained_var: 0.9829019904136658
          vf_loss: 0.10567890795923415
    num_agent_steps_sampled: 1526472
    num_agent_steps_trained: 1526472
    num_steps_sampled: 1526472
    num_steps_trained: 1526472
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,764,20979.2,1526472,9.2753,14.72,-0.07,100.74


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1528470
  custom_metrics: {}
  date: 2021-11-09_09-35-39
  done: false
  episode_len_mean: 103.03
  episode_media: {}
  episode_reward_max: 14.720000000000017
  episode_reward_mean: 9.149400000000016
  episode_reward_min: 2.780000000000017
  episodes_this_iter: 19
  episodes_total: 15060
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4183958842640831
          entropy_coeff: 0.009999999999999998
          kl: 0.008690365839758615
          policy_loss: -0.07002579749872287
          total_loss: 0.07121065699805816
          vf_explained_var: 0.9772526621818542
          vf_loss: 0.14484969741176992
    num_agent_steps_sampled: 1528470
    num_agent_steps_trained: 1528470
    num_steps_sampled: 1528470
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,765,21003.4,1528470,9.1494,14.72,2.78,103.03


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1530468
  custom_metrics: {}
  date: 2021-11-09_09-36-05
  done: false
  episode_len_mean: 102.82
  episode_media: {}
  episode_reward_max: 14.600000000000016
  episode_reward_mean: 8.55650000000002
  episode_reward_min: 2.780000000000017
  episodes_this_iter: 17
  episodes_total: 15077
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.5302906405358088
          entropy_coeff: 0.009999999999999998
          kl: 0.008199174395047552
          policy_loss: -0.056169701048306056
          total_loss: 0.05740644193830944
          vf_explained_var: 0.9620155692100525
          vf_loss: 0.11890580423531077
    num_agent_steps_sampled: 1530468
    num_agent_steps_trained: 1530468
    num_steps_sampled: 1530468
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,766,21028.9,1530468,8.5565,14.6,2.78,102.82


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1532466
  custom_metrics: {}
  date: 2021-11-09_09-36-29
  done: false
  episode_len_mean: 104.85
  episode_media: {}
  episode_reward_max: 14.600000000000016
  episode_reward_mean: 8.328200000000018
  episode_reward_min: 2.8500000000000165
  episodes_this_iter: 20
  episodes_total: 15097
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4260697376160394
          entropy_coeff: 0.009999999999999998
          kl: 0.01031541087185494
          policy_loss: -0.06240008655225947
          total_loss: 0.12778800645222266
          vf_explained_var: 0.972048819065094
          vf_loss: 0.19190141124029955
    num_agent_steps_sampled: 1532466
    num_agent_steps_trained: 1532466
    num_steps_sampled: 1532466
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,767,21052.6,1532466,8.3282,14.6,2.85,104.85


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1534464
  custom_metrics: {}
  date: 2021-11-09_09-36-52
  done: false
  episode_len_mean: 105.53
  episode_media: {}
  episode_reward_max: 14.600000000000016
  episode_reward_mean: 8.085300000000018
  episode_reward_min: 2.8500000000000165
  episodes_this_iter: 18
  episodes_total: 15115
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4185200691223145
          entropy_coeff: 0.009999999999999998
          kl: 0.00802315607770619
          policy_loss: -0.04360355429706119
          total_loss: 0.030063048448591007
          vf_explained_var: 0.9789707660675049
          vf_loss: 0.0780926621385983
    num_agent_steps_sampled: 1534464
    num_agent_steps_trained: 1534464
    num_steps_sampled: 1534464
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,768,21076.1,1534464,8.0853,14.6,2.85,105.53


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1536462
  custom_metrics: {}
  date: 2021-11-09_09-37-16
  done: false
  episode_len_mean: 106.09
  episode_media: {}
  episode_reward_max: 14.600000000000016
  episode_reward_mean: 7.773400000000017
  episode_reward_min: 2.8500000000000165
  episodes_this_iter: 19
  episodes_total: 15134
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.473924081666129
          entropy_coeff: 0.009999999999999998
          kl: 0.009033015069768583
          policy_loss: 0.0037231260821932836
          total_loss: 0.1636185457486482
          vf_explained_var: 0.969031035900116
          vf_loss: 0.16364715567656926
    num_agent_steps_sampled: 1536462
    num_agent_steps_trained: 1536462
    num_steps_sampled: 1536462
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,769,21100,1536462,7.7734,14.6,2.85,106.09


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1538460
  custom_metrics: {}
  date: 2021-11-09_09-37-39
  done: false
  episode_len_mean: 107.9
  episode_media: {}
  episode_reward_max: 14.420000000000018
  episode_reward_mean: 7.55980000000002
  episode_reward_min: 2.8500000000000165
  episodes_this_iter: 17
  episodes_total: 15151
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4176537298020861
          entropy_coeff: 0.009999999999999998
          kl: 0.010291171571778468
          policy_loss: -0.07030648803781896
          total_loss: 0.08487040808069564
          vf_explained_var: 0.9734088778495789
          vf_loss: 0.15683554187417031
    num_agent_steps_sampled: 1538460
    num_agent_steps_trained: 1538460
    num_steps_sampled: 1538460
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,770,21123.2,1538460,7.5598,14.42,2.85,107.9


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1540458
  custom_metrics: {}
  date: 2021-11-09_09-38-04
  done: false
  episode_len_mean: 108.31
  episode_media: {}
  episode_reward_max: 14.430000000000017
  episode_reward_mean: 7.809000000000019
  episode_reward_min: 2.3300000000000116
  episodes_this_iter: 19
  episodes_total: 15170
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4195697733334132
          entropy_coeff: 0.009999999999999998
          kl: 0.014802501119834706
          policy_loss: -0.014700900124652045
          total_loss: 0.18017939668858335
          vf_explained_var: 0.9681430459022522
          vf_loss: 0.19107065026958783
    num_agent_steps_sampled: 1540458
    num_agent_steps_trained: 1540458
    num_steps_sampled: 1540458
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,771,21148,1540458,7.809,14.43,2.33,108.31


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1542456
  custom_metrics: {}
  date: 2021-11-09_09-38-30
  done: false
  episode_len_mean: 107.21
  episode_media: {}
  episode_reward_max: 14.430000000000017
  episode_reward_mean: 7.9421000000000195
  episode_reward_min: 2.3300000000000116
  episodes_this_iter: 20
  episodes_total: 15190
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.2686521388235545
          entropy_coeff: 0.009999999999999998
          kl: 0.007104860357786664
          policy_loss: -0.06762673793626683
          total_loss: 0.015546484433469319
          vf_explained_var: 0.9819949269294739
          vf_loss: 0.08721759048778387
    num_agent_steps_sampled: 1542456
    num_agent_steps_trained: 1542456
    num_steps_sampled: 1542456
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,772,21174,1542456,7.9421,14.43,2.33,107.21


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1544454
  custom_metrics: {}
  date: 2021-11-09_09-38-54
  done: false
  episode_len_mean: 106.99
  episode_media: {}
  episode_reward_max: 14.490000000000016
  episode_reward_mean: 8.278800000000018
  episode_reward_min: 2.3300000000000116
  episodes_this_iter: 18
  episodes_total: 15208
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3700681601251874
          entropy_coeff: 0.009999999999999998
          kl: 0.012090053156661078
          policy_loss: -0.07664689052672613
          total_loss: 0.151424932391161
          vf_explained_var: 0.9537420868873596
          vf_loss: 0.22706650502624967
    num_agent_steps_sampled: 1544454
    num_agent_steps_trained: 1544454
    num_steps_sampled: 1544454
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,773,21197.8,1544454,8.2788,14.49,2.33,106.99


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1546452
  custom_metrics: {}
  date: 2021-11-09_09-39-18
  done: false
  episode_len_mean: 107.07
  episode_media: {}
  episode_reward_max: 14.490000000000016
  episode_reward_mean: 8.383400000000018
  episode_reward_min: 2.3300000000000116
  episodes_this_iter: 18
  episodes_total: 15226
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4630757678122748
          entropy_coeff: 0.009999999999999998
          kl: 0.007258871425318112
          policy_loss: -0.041490176027374606
          total_loss: 0.04091547681462197
          vf_explained_var: 0.9808811545372009
          vf_loss: 0.08820692304344405
    num_agent_steps_sampled: 1546452
    num_agent_steps_trained: 1546452
    num_steps_sampled: 1546452
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,774,21221.9,1546452,8.3834,14.49,2.33,107.07


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1548450
  custom_metrics: {}
  date: 2021-11-09_09-39-43
  done: false
  episode_len_mean: 105.34
  episode_media: {}
  episode_reward_max: 14.640000000000015
  episode_reward_mean: 8.484100000000018
  episode_reward_min: 2.3300000000000116
  episodes_this_iter: 20
  episodes_total: 15246
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.423000446955363
          entropy_coeff: 0.009999999999999998
          kl: 0.007020768182973958
          policy_loss: -0.03610693906389532
          total_loss: 0.08501919547007197
          vf_explained_var: 0.9794930815696716
          vf_loss: 0.1268162743144092
    num_agent_steps_sampled: 1548450
    num_agent_steps_trained: 1548450
    num_steps_sampled: 1548450
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,775,21246.8,1548450,8.4841,14.64,2.33,105.34


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1550448
  custom_metrics: {}
  date: 2021-11-09_09-40-08
  done: false
  episode_len_mean: 105.92
  episode_media: {}
  episode_reward_max: 14.640000000000015
  episode_reward_mean: 8.175600000000019
  episode_reward_min: 2.3300000000000116
  episodes_this_iter: 19
  episodes_total: 15265
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4084159209614708
          entropy_coeff: 0.009999999999999998
          kl: 0.010889766900296343
          policy_loss: 0.013271900230929966
          total_loss: 0.21727421752044132
          vf_explained_var: 0.960392415523529
          vf_loss: 0.20484046875720932
    num_agent_steps_sampled: 1550448
    num_agent_steps_trained: 1550448
    num_steps_sampled: 1550448
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,776,21271.5,1550448,8.1756,14.64,2.33,105.92


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1552446
  custom_metrics: {}
  date: 2021-11-09_09-40-32
  done: false
  episode_len_mean: 106.48
  episode_media: {}
  episode_reward_max: 14.640000000000015
  episode_reward_mean: 8.244500000000018
  episode_reward_min: 0.5100000000000093
  episodes_this_iter: 18
  episodes_total: 15283
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.386458672795977
          entropy_coeff: 0.009999999999999998
          kl: 0.01283962815845628
          policy_loss: -0.06083663994712489
          total_loss: 0.17467579152435064
          vf_explained_var: 0.9531303644180298
          vf_loss: 0.23375925279798962
    num_agent_steps_sampled: 1552446
    num_agent_steps_trained: 1552446
    num_steps_sampled: 1552446
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,777,21295.7,1552446,8.2445,14.64,0.51,106.48


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1554444
  custom_metrics: {}
  date: 2021-11-09_09-40-56
  done: false
  episode_len_mean: 107.51
  episode_media: {}
  episode_reward_max: 14.640000000000015
  episode_reward_mean: 8.076300000000018
  episode_reward_min: 0.5100000000000093
  episodes_this_iter: 19
  episodes_total: 15302
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4648079230671838
          entropy_coeff: 0.009999999999999998
          kl: 0.016078046153503965
          policy_loss: -0.03215121259646756
          total_loss: 0.3685498942665401
          vf_explained_var: 0.9392228126525879
          vf_loss: 0.39579230028958545
    num_agent_steps_sampled: 1554444
    num_agent_steps_trained: 1554444
    num_steps_sampled: 1554444
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,778,21319.5,1554444,8.0763,14.64,0.51,107.51




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1556442
  custom_metrics: {}
  date: 2021-11-09_09-41-50
  done: false
  episode_len_mean: 106.07
  episode_media: {}
  episode_reward_max: 14.640000000000015
  episode_reward_mean: 7.739200000000019
  episode_reward_min: -0.07
  episodes_this_iter: 19
  episodes_total: 15321
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.5063958951405116
          entropy_coeff: 0.009999999999999998
          kl: 0.008841582871274343
          policy_loss: -0.035649309307336806
          total_loss: 0.21793146931699345
          vf_explained_var: 0.9293957948684692
          vf_loss: 0.25789008584050904
    num_agent_steps_sampled: 1556442
    num_agent_steps_trained: 1556442
    num_steps_sampled: 1556442
    num_steps_trained: 1556442
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,779,21373.6,1556442,7.7392,14.64,-0.07,106.07


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1558440
  custom_metrics: {}
  date: 2021-11-09_09-42-14
  done: false
  episode_len_mean: 106.63
  episode_media: {}
  episode_reward_max: 14.640000000000015
  episode_reward_mean: 7.914000000000017
  episode_reward_min: -0.07
  episodes_this_iter: 18
  episodes_total: 15339
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.343229223432995
          entropy_coeff: 0.009999999999999998
          kl: 0.009716422363716665
          policy_loss: -0.05848057202640034
          total_loss: 0.08399235230116617
          vf_explained_var: 0.9732779264450073
          vf_loss: 0.14408643442605223
    num_agent_steps_sampled: 1558440
    num_agent_steps_trained: 1558440
    num_steps_sampled: 1558440
    num_steps_trained: 1558440
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,780,21397.7,1558440,7.914,14.64,-0.07,106.63




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1560438
  custom_metrics: {}
  date: 2021-11-09_09-42-53
  done: false
  episode_len_mean: 106.39
  episode_media: {}
  episode_reward_max: 14.510000000000018
  episode_reward_mean: 8.095800000000018
  episode_reward_min: -0.07
  episodes_this_iter: 19
  episodes_total: 15358
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3738634938285463
          entropy_coeff: 0.009999999999999998
          kl: 0.010823603858361333
          policy_loss: -0.05181260486798627
          total_loss: 0.37264308201237806
          vf_explained_var: 0.9483891725540161
          vf_loss: 0.4250287941346566
    num_agent_steps_sampled: 1560438
    num_agent_steps_trained: 1560438
    num_steps_sampled: 1560438
    num_steps_trained: 1560438
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,781,21436.5,1560438,8.0958,14.51,-0.07,106.39


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1562436
  custom_metrics: {}
  date: 2021-11-09_09-43-16
  done: false
  episode_len_mean: 106.21
  episode_media: {}
  episode_reward_max: 14.510000000000018
  episode_reward_mean: 8.503900000000018
  episode_reward_min: -0.07
  episodes_this_iter: 19
  episodes_total: 15377
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.357987167721703
          entropy_coeff: 0.009999999999999998
          kl: 0.00998051973739714
          policy_loss: -0.06781209309895833
          total_loss: 0.12685884371222483
          vf_explained_var: 0.9782810807228088
          vf_loss: 0.19611078193854717
    num_agent_steps_sampled: 1562436
    num_agent_steps_trained: 1562436
    num_steps_sampled: 1562436
    num_steps_trained: 1562436
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,782,21460,1562436,8.5039,14.51,-0.07,106.21


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1564434
  custom_metrics: {}
  date: 2021-11-09_09-43-39
  done: false
  episode_len_mean: 106.79
  episode_media: {}
  episode_reward_max: 14.510000000000018
  episode_reward_mean: 8.512500000000017
  episode_reward_min: -0.07
  episodes_this_iter: 17
  episodes_total: 15394
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4705533192271278
          entropy_coeff: 0.009999999999999998
          kl: 0.009161799951674815
          policy_loss: -0.022177232659998394
          total_loss: 0.10955650301738865
          vf_explained_var: 0.9591698050498962
          vf_loss: 0.13529511280357837
    num_agent_steps_sampled: 1564434
    num_agent_steps_trained: 1564434
    num_steps_sampled: 1564434
    num_steps_trained: 1564434
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,783,21482.2,1564434,8.5125,14.51,-0.07,106.79


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1566432
  custom_metrics: {}
  date: 2021-11-09_09-44-02
  done: false
  episode_len_mean: 108.11
  episode_media: {}
  episode_reward_max: 14.510000000000018
  episode_reward_mean: 8.67350000000002
  episode_reward_min: -0.05
  episodes_this_iter: 19
  episodes_total: 15413
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3749867564155942
          entropy_coeff: 0.009999999999999998
          kl: 0.008432337175384818
          policy_loss: -0.015429498308471271
          total_loss: 0.14463084081986122
          vf_explained_var: 0.9631080031394958
          vf_loss: 0.1635533487335557
    num_agent_steps_sampled: 1566432
    num_agent_steps_trained: 1566432
    num_steps_sampled: 1566432
    num_steps_trained: 1566432
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,784,21505.8,1566432,8.6735,14.51,-0.05,108.11


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1568430
  custom_metrics: {}
  date: 2021-11-09_09-44-27
  done: false
  episode_len_mean: 107.78
  episode_media: {}
  episode_reward_max: 14.510000000000018
  episode_reward_mean: 8.698400000000019
  episode_reward_min: -0.05
  episodes_this_iter: 19
  episodes_total: 15432
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4064187469936553
          entropy_coeff: 0.009999999999999998
          kl: 0.00937735674267497
          policy_loss: -0.028088787606074698
          total_loss: 0.28884897551588007
          vf_explained_var: 0.9498924612998962
          vf_loss: 0.31959559757794653
    num_agent_steps_sampled: 1568430
    num_agent_steps_trained: 1568430
    num_steps_sampled: 1568430
    num_steps_trained: 1568430
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,785,21530.4,1568430,8.6984,14.51,-0.05,107.78


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1570428
  custom_metrics: {}
  date: 2021-11-09_09-44-51
  done: false
  episode_len_mean: 108.9
  episode_media: {}
  episode_reward_max: 14.470000000000017
  episode_reward_mean: 8.602800000000018
  episode_reward_min: 2.5500000000000083
  episodes_this_iter: 19
  episodes_total: 15451
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3423923901149204
          entropy_coeff: 0.009999999999999998
          kl: 0.007879531997679741
          policy_loss: -0.05029438442683646
          total_loss: 0.051203963586262294
          vf_explained_var: 0.9827382564544678
          vf_loss: 0.10533782940890107
    num_agent_steps_sampled: 1570428
    num_agent_steps_trained: 1570428
    num_steps_sampled: 1570428
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,786,21554.6,1570428,8.6028,14.47,2.55,108.9


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1572426
  custom_metrics: {}
  date: 2021-11-09_09-45-16
  done: false
  episode_len_mean: 108.19
  episode_media: {}
  episode_reward_max: 14.400000000000016
  episode_reward_mean: 8.37780000000002
  episode_reward_min: 2.5500000000000083
  episodes_this_iter: 19
  episodes_total: 15470
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.405622132619222
          entropy_coeff: 0.009999999999999998
          kl: 0.009463620441557019
          policy_loss: -0.03551664779938403
          total_loss: 0.09762663567172629
          vf_explained_var: 0.9789922833442688
          vf_loss: 0.13568822415102097
    num_agent_steps_sampled: 1572426
    num_agent_steps_trained: 1572426
    num_steps_sampled: 1572426
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,787,21579.2,1572426,8.3778,14.4,2.55,108.19


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1574424
  custom_metrics: {}
  date: 2021-11-09_09-45-39
  done: false
  episode_len_mean: 107.07
  episode_media: {}
  episode_reward_max: 14.380000000000013
  episode_reward_mean: 8.695400000000017
  episode_reward_min: 2.5500000000000083
  episodes_this_iter: 18
  episodes_total: 15488
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4122321929250445
          entropy_coeff: 0.009999999999999998
          kl: 0.007660747793288673
          policy_loss: -0.04282119061265673
          total_loss: 0.033009111872386365
          vf_explained_var: 0.9909237623214722
          vf_loss: 0.08063430720496745
    num_agent_steps_sampled: 1574424
    num_agent_steps_trained: 1574424
    num_steps_sampled: 1574424
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,788,21602.9,1574424,8.6954,14.38,2.55,107.07


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1576422
  custom_metrics: {}
  date: 2021-11-09_09-46-04
  done: false
  episode_len_mean: 106.29
  episode_media: {}
  episode_reward_max: 14.380000000000013
  episode_reward_mean: 8.72490000000002
  episode_reward_min: 2.5500000000000083
  episodes_this_iter: 20
  episodes_total: 15508
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4389271912120638
          entropy_coeff: 0.009999999999999998
          kl: 0.006347783989866549
          policy_loss: -0.003081551016796203
          total_loss: 0.07925814931236562
          vf_explained_var: 0.9791468977928162
          vf_loss: 0.08900770301974956
    num_agent_steps_sampled: 1576422
    num_agent_steps_trained: 1576422
    num_steps_sampled: 1576422
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,789,21626.9,1576422,8.7249,14.38,2.55,106.29


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1578420
  custom_metrics: {}
  date: 2021-11-09_09-46-28
  done: false
  episode_len_mean: 106.36
  episode_media: {}
  episode_reward_max: 14.390000000000017
  episode_reward_mean: 9.01810000000002
  episode_reward_min: 2.5500000000000083
  episodes_this_iter: 18
  episodes_total: 15526
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3922534925597054
          entropy_coeff: 0.009999999999999998
          kl: 0.009498482193040891
          policy_loss: -0.04770283769993555
          total_loss: 0.10533243918880111
          vf_explained_var: 0.9785324931144714
          vf_loss: 0.1554041259345554
    num_agent_steps_sampled: 1578420
    num_agent_steps_trained: 1578420
    num_steps_sampled: 1578420
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,790,21650.9,1578420,9.0181,14.39,2.55,106.36


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1580418
  custom_metrics: {}
  date: 2021-11-09_09-46-52
  done: false
  episode_len_mean: 107.03
  episode_media: {}
  episode_reward_max: 14.390000000000017
  episode_reward_mean: 9.025500000000019
  episode_reward_min: 2.7200000000000215
  episodes_this_iter: 18
  episodes_total: 15544
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4245651421092806
          entropy_coeff: 0.009999999999999998
          kl: 0.007597952266447879
          policy_loss: -0.048067301387588184
          total_loss: 0.022155585929396607
          vf_explained_var: 0.9848585724830627
          vf_loss: 0.07522660312021062
    num_agent_steps_sampled: 1580418
    num_agent_steps_trained: 1580418
    num_steps_sampled: 1580418
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,791,21674.9,1580418,9.0255,14.39,2.72,107.03


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1582416
  custom_metrics: {}
  date: 2021-11-09_09-47-16
  done: false
  episode_len_mean: 105.93
  episode_media: {}
  episode_reward_max: 14.390000000000017
  episode_reward_mean: 9.00800000000002
  episode_reward_min: -0.9500000000000007
  episodes_this_iter: 20
  episodes_total: 15564
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.35453940090679
          entropy_coeff: 0.009999999999999998
          kl: 0.012668774639496354
          policy_loss: -0.02844721328999315
          total_loss: 0.2721462094712825
          vf_explained_var: 0.9643126130104065
          vf_loss: 0.29872887758981614
    num_agent_steps_sampled: 1582416
    num_agent_steps_trained: 1582416
    num_steps_sampled: 1582416
    num_steps_trained: 158

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,792,21699.8,1582416,9.008,14.39,-0.95,105.93


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1584414
  custom_metrics: {}
  date: 2021-11-09_09-47-41
  done: false
  episode_len_mean: 104.83
  episode_media: {}
  episode_reward_max: 14.390000000000017
  episode_reward_mean: 8.961800000000018
  episode_reward_min: -0.9500000000000007
  episodes_this_iter: 19
  episodes_total: 15583
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3562438539096286
          entropy_coeff: 0.009999999999999998
          kl: 0.008103302841176706
          policy_loss: -0.0425676155125811
          total_loss: 0.03562439900955983
          vf_explained_var: 0.9886753559112549
          vf_loss: 0.08189782429309118
    num_agent_steps_sampled: 1584414
    num_agent_steps_trained: 1584414
    num_steps_sampled: 1584414
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,793,21724.2,1584414,8.9618,14.39,-0.95,104.83


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1586412
  custom_metrics: {}
  date: 2021-11-09_09-48-05
  done: false
  episode_len_mean: 104.9
  episode_media: {}
  episode_reward_max: 14.550000000000017
  episode_reward_mean: 8.942400000000019
  episode_reward_min: -0.9500000000000007
  episodes_this_iter: 19
  episodes_total: 15602
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3801090115592594
          entropy_coeff: 0.009999999999999998
          kl: 0.010471115782131026
          policy_loss: -0.03278036931795733
          total_loss: 0.1431515704663027
          vf_explained_var: 0.9760290384292603
          vf_loss: 0.17699625472582523
    num_agent_steps_sampled: 1586412
    num_agent_steps_trained: 1586412
    num_steps_sampled: 1586412
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,794,21748.4,1586412,8.9424,14.55,-0.95,104.9


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1588410
  custom_metrics: {}
  date: 2021-11-09_09-48-29
  done: false
  episode_len_mean: 104.9
  episode_media: {}
  episode_reward_max: 14.550000000000017
  episode_reward_mean: 8.675100000000018
  episode_reward_min: -0.9500000000000007
  episodes_this_iter: 19
  episodes_total: 15621
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.46319918405442
          entropy_coeff: 0.009999999999999998
          kl: 0.008596446798079426
          policy_loss: -0.06941911092116719
          total_loss: 0.057061701374394556
          vf_explained_var: 0.9778438806533813
          vf_loss: 0.1306563286376851
    num_agent_steps_sampled: 1588410
    num_agent_steps_trained: 1588410
    num_steps_sampled: 1588410
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,795,21772.3,1588410,8.6751,14.55,-0.95,104.9


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1590408
  custom_metrics: {}
  date: 2021-11-09_09-48-54
  done: false
  episode_len_mean: 103.94
  episode_media: {}
  episode_reward_max: 14.550000000000017
  episode_reward_mean: 8.735500000000018
  episode_reward_min: -0.9500000000000007
  episodes_this_iter: 19
  episodes_total: 15640
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4148144727661496
          entropy_coeff: 0.009999999999999998
          kl: 0.007982502408667484
          policy_loss: -0.053071493016822
          total_loss: 0.009075194668202172
          vf_explained_var: 0.9885252714157104
          vf_loss: 0.06658514169532628
    num_agent_steps_sampled: 1590408
    num_agent_steps_trained: 1590408
    num_steps_sampled: 1590408
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,796,21796.7,1590408,8.7355,14.55,-0.95,103.94


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1592406
  custom_metrics: {}
  date: 2021-11-09_09-49-19
  done: false
  episode_len_mean: 104.73
  episode_media: {}
  episode_reward_max: 14.550000000000017
  episode_reward_mean: 8.811000000000021
  episode_reward_min: 3.8600000000000203
  episodes_this_iter: 19
  episodes_total: 15659
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4577801233246213
          entropy_coeff: 0.009999999999999998
          kl: 0.01023328991678829
          policy_loss: -0.02623872756071034
          total_loss: 0.09571247130100216
          vf_explained_var: 0.9829581379890442
          vf_loss: 0.12408151464270693
    num_agent_steps_sampled: 1592406
    num_agent_steps_trained: 1592406
    num_steps_sampled: 1592406
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,797,21821.7,1592406,8.811,14.55,3.86,104.73




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1594404
  custom_metrics: {}
  date: 2021-11-09_09-50-14
  done: false
  episode_len_mean: 103.75
  episode_media: {}
  episode_reward_max: 14.550000000000017
  episode_reward_mean: 8.588700000000017
  episode_reward_min: -0.03
  episodes_this_iter: 21
  episodes_total: 15680
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3916197231837681
          entropy_coeff: 0.009999999999999998
          kl: 0.00984210499619018
          policy_loss: -0.04102796195518403
          total_loss: 0.12716502151673748
          vf_explained_var: 0.9798147082328796
          vf_loss: 0.17013751837824073
    num_agent_steps_sampled: 1594404
    num_agent_steps_trained: 1594404
    num_steps_sampled: 1594404
    num_steps_trained: 1594404
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,798,21877.2,1594404,8.5887,14.55,-0.03,103.75




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1596402
  custom_metrics: {}
  date: 2021-11-09_09-50-54
  done: false
  episode_len_mean: 102.65
  episode_media: {}
  episode_reward_max: 14.410000000000018
  episode_reward_mean: 8.574800000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 15699
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4519960727010455
          entropy_coeff: 0.009999999999999998
          kl: 0.009675914194247083
          policy_loss: -0.03569229646098046
          total_loss: 0.3624910578974301
          vf_explained_var: 0.9499946236610413
          vf_loss: 0.4009337987218584
    num_agent_steps_sampled: 1596402
    num_agent_steps_trained: 1596402
    num_steps_sampled: 1596402
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,799,21916.7,1596402,8.5748,14.41,-0.06,102.65


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1598400
  custom_metrics: {}
  date: 2021-11-09_09-51-19
  done: false
  episode_len_mean: 103.01
  episode_media: {}
  episode_reward_max: 14.430000000000016
  episode_reward_mean: 8.884400000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 15718
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.400562254020146
          entropy_coeff: 0.009999999999999998
          kl: 0.0077682554824936596
          policy_loss: -0.09330821328219913
          total_loss: -0.011202858876259554
          vf_explained_var: 0.9893081188201904
          vf_loss: 0.08666189059260346
    num_agent_steps_sampled: 1598400
    num_agent_steps_trained: 1598400
    num_steps_sampled: 1598400
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,800,21941.8,1598400,8.8844,14.43,-0.06,103.01


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1600398
  custom_metrics: {}
  date: 2021-11-09_09-51-44
  done: false
  episode_len_mean: 103.65
  episode_media: {}
  episode_reward_max: 14.430000000000016
  episode_reward_mean: 8.775500000000019
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 18
  episodes_total: 15736
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.427791464328766
          entropy_coeff: 0.009999999999999998
          kl: 0.011133701719940195
          policy_loss: -0.07044526966554777
          total_loss: 0.07977577481596243
          vf_explained_var: 0.975238025188446
          vf_loss: 0.15095623378597556
    num_agent_steps_sampled: 1600398
    num_agent_steps_trained: 1600398
    num_steps_sampled: 1600398
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,801,21966.5,1600398,8.7755,14.43,-0.06,103.65


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1602396
  custom_metrics: {}
  date: 2021-11-09_09-52-08
  done: false
  episode_len_mean: 104.12
  episode_media: {}
  episode_reward_max: 14.430000000000016
  episode_reward_mean: 8.776100000000019
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 15755
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4914989800680252
          entropy_coeff: 0.009999999999999998
          kl: 0.011277600505665068
          policy_loss: -0.03987844435586816
          total_loss: 0.1007592525333166
          vf_explained_var: 0.9759935736656189
          vf_loss: 0.14183493068530445
    num_agent_steps_sampled: 1602396
    num_agent_steps_trained: 1602396
    num_steps_sampled: 1602396
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,802,21990.5,1602396,8.7761,14.43,-0.06,104.12


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1604394
  custom_metrics: {}
  date: 2021-11-09_09-52-32
  done: false
  episode_len_mean: 106.04
  episode_media: {}
  episode_reward_max: 14.430000000000016
  episode_reward_mean: 8.828300000000018
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 15774
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.5025533210663569
          entropy_coeff: 0.009999999999999998
          kl: 0.008765449263420917
          policy_loss: -0.025794495739752336
          total_loss: 0.05494457832759335
          vf_explained_var: 0.9805922508239746
          vf_loss: 0.0851025598212367
    num_agent_steps_sampled: 1604394
    num_agent_steps_trained: 1604394
    num_steps_sampled: 1604394
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,803,22014.8,1604394,8.8283,14.43,-0.06,106.04


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1606392
  custom_metrics: {}
  date: 2021-11-09_09-52-57
  done: false
  episode_len_mean: 106.89
  episode_media: {}
  episode_reward_max: 14.470000000000017
  episode_reward_mean: 9.102400000000017
  episode_reward_min: 3.3400000000000074
  episodes_this_iter: 19
  episodes_total: 15793
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3516904967171806
          entropy_coeff: 0.009999999999999998
          kl: 0.007794625192183042
          policy_loss: -0.059440128930977415
          total_loss: -0.013506880633178212
          vf_explained_var: 0.9946184158325195
          vf_loss: 0.04996898841290247
    num_agent_steps_sampled: 1606392
    num_agent_steps_trained: 1606392
    num_steps_sampled: 1606392
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,804,22039.7,1606392,9.1024,14.47,3.34,106.89


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1608390
  custom_metrics: {}
  date: 2021-11-09_09-53-20
  done: false
  episode_len_mean: 106.67
  episode_media: {}
  episode_reward_max: 14.470000000000017
  episode_reward_mean: 8.97420000000002
  episode_reward_min: 3.3400000000000074
  episodes_this_iter: 18
  episodes_total: 15811
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4180253255934943
          entropy_coeff: 0.009999999999999998
          kl: 0.008892303653711874
          policy_loss: -0.04154622045656045
          total_loss: 0.08078336389291854
          vf_explained_var: 0.9777621030807495
          vf_loss: 0.12569348721631934
    num_agent_steps_sampled: 1608390
    num_agent_steps_trained: 1608390
    num_steps_sampled: 1608390
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,805,22062.6,1608390,8.9742,14.47,3.34,106.67


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1610388
  custom_metrics: {}
  date: 2021-11-09_09-53-43
  done: false
  episode_len_mean: 107.07
  episode_media: {}
  episode_reward_max: 14.470000000000017
  episode_reward_mean: 8.89280000000002
  episode_reward_min: 3.3400000000000074
  episodes_this_iter: 18
  episodes_total: 15829
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4664179171834673
          entropy_coeff: 0.009999999999999998
          kl: 0.011923139198207318
          policy_loss: -0.05730473856840815
          total_loss: 0.08206304255872965
          vf_explained_var: 0.9784966111183167
          vf_loss: 0.1395289895789964
    num_agent_steps_sampled: 1610388
    num_agent_steps_trained: 1610388
    num_steps_sampled: 1610388
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,806,22086.2,1610388,8.8928,14.47,3.34,107.07


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1612386
  custom_metrics: {}
  date: 2021-11-09_09-54-08
  done: false
  episode_len_mean: 108.57
  episode_media: {}
  episode_reward_max: 14.470000000000017
  episode_reward_mean: 9.18920000000002
  episode_reward_min: 4.530000000000019
  episodes_this_iter: 18
  episodes_total: 15847
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4263930252620152
          entropy_coeff: 0.009999999999999998
          kl: 0.008536450654679752
          policy_loss: -0.021974415314339454
          total_loss: 0.09274969546213037
          vf_explained_var: 0.9806737899780273
          vf_loss: 0.11860454357450917
    num_agent_steps_sampled: 1612386
    num_agent_steps_trained: 1612386
    num_steps_sampled: 1612386
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,807,22110.5,1612386,9.1892,14.47,4.53,108.57


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1614384
  custom_metrics: {}
  date: 2021-11-09_09-54-32
  done: false
  episode_len_mean: 108.41
  episode_media: {}
  episode_reward_max: 14.580000000000016
  episode_reward_mean: 9.42010000000002
  episode_reward_min: 4.530000000000019
  episodes_this_iter: 18
  episodes_total: 15865
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4747022617430914
          entropy_coeff: 0.009999999999999998
          kl: 0.009110978256766065
          policy_loss: -0.04369945640542677
          total_loss: 0.067540368065238
          vf_explained_var: 0.9838581681251526
          vf_loss: 0.11490450878405854
    num_agent_steps_sampled: 1614384
    num_agent_steps_trained: 1614384
    num_steps_sampled: 1614384
    num_steps_trained: 1614

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,808,22134.5,1614384,9.4201,14.58,4.53,108.41


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1616382
  custom_metrics: {}
  date: 2021-11-09_09-54-56
  done: false
  episode_len_mean: 108.72
  episode_media: {}
  episode_reward_max: 14.580000000000016
  episode_reward_mean: 9.305700000000021
  episode_reward_min: 2.4600000000000186
  episodes_this_iter: 19
  episodes_total: 15884
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4430197693052746
          entropy_coeff: 0.009999999999999998
          kl: 0.008904239917523529
          policy_loss: -0.05450032956543423
          total_loss: 0.06696530821777526
          vf_explained_var: 0.9788404107093811
          vf_loss: 0.1250649672888574
    num_agent_steps_sampled: 1616382
    num_agent_steps_trained: 1616382
    num_steps_sampled: 1616382
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,809,22158.8,1616382,9.3057,14.58,2.46,108.72


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1618380
  custom_metrics: {}
  date: 2021-11-09_09-55-21
  done: false
  episode_len_mean: 108.42
  episode_media: {}
  episode_reward_max: 14.580000000000016
  episode_reward_mean: 9.27490000000002
  episode_reward_min: 2.4600000000000186
  episodes_this_iter: 19
  episodes_total: 15903
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4667568416822525
          entropy_coeff: 0.009999999999999998
          kl: 0.011363182117897897
          policy_loss: -0.018792332016995976
          total_loss: 0.15049520887079693
          vf_explained_var: 0.9765924215316772
          vf_loss: 0.17013325444644406
    num_agent_steps_sampled: 1618380
    num_agent_steps_trained: 1618380
    num_steps_sampled: 1618380
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,810,22183.8,1618380,9.2749,14.58,2.46,108.42


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1620378
  custom_metrics: {}
  date: 2021-11-09_09-55-49
  done: false
  episode_len_mean: 108.41
  episode_media: {}
  episode_reward_max: 14.590000000000016
  episode_reward_mean: 9.18670000000002
  episode_reward_min: 2.4600000000000186
  episodes_this_iter: 19
  episodes_total: 15922
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4322925533567157
          entropy_coeff: 0.009999999999999998
          kl: 0.008991412402219862
          policy_loss: -0.07435132586175487
          total_loss: 0.05529102283929076
          vf_explained_var: 0.9715009331703186
          vf_loss: 0.133028373335089
    num_agent_steps_sampled: 1620378
    num_agent_steps_trained: 1620378
    num_steps_sampled: 1620378
    num_steps_trained: 162

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,811,22212.2,1620378,9.1867,14.59,2.46,108.41


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1622376
  custom_metrics: {}
  date: 2021-11-09_09-56-14
  done: false
  episode_len_mean: 106.73
  episode_media: {}
  episode_reward_max: 14.590000000000016
  episode_reward_mean: 9.341200000000018
  episode_reward_min: 2.4600000000000186
  episodes_this_iter: 19
  episodes_total: 15941
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3187714207740058
          entropy_coeff: 0.009999999999999998
          kl: 0.009207312476377065
          policy_loss: -0.03880346811243466
          total_loss: 0.08215555240000998
          vf_explained_var: 0.9829033017158508
          vf_loss: 0.12294722016723383
    num_agent_steps_sampled: 1622376
    num_agent_steps_trained: 1622376
    num_steps_sampled: 1622376
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,812,22237.1,1622376,9.3412,14.59,2.46,106.73


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1624374
  custom_metrics: {}
  date: 2021-11-09_09-56-39
  done: false
  episode_len_mean: 106.85
  episode_media: {}
  episode_reward_max: 14.590000000000016
  episode_reward_mean: 9.199500000000018
  episode_reward_min: 2.4600000000000186
  episodes_this_iter: 18
  episodes_total: 15959
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4025768535477774
          entropy_coeff: 0.009999999999999998
          kl: 0.007077441655040791
          policy_loss: -0.09857259435313089
          total_loss: 0.008677196316421032
          vf_explained_var: 0.9775927662849426
          vf_loss: 0.11266675791924909
    num_agent_steps_sampled: 1624374
    num_agent_steps_trained: 1624374
    num_steps_sampled: 1624374
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,813,22261.5,1624374,9.1995,14.59,2.46,106.85


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1626372
  custom_metrics: {}
  date: 2021-11-09_09-57-03
  done: false
  episode_len_mean: 106.61
  episode_media: {}
  episode_reward_max: 14.590000000000016
  episode_reward_mean: 9.038300000000019
  episode_reward_min: 3.8400000000000265
  episodes_this_iter: 19
  episodes_total: 15978
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4307321701731002
          entropy_coeff: 0.009999999999999998
          kl: 0.008731259643096798
          policy_loss: -0.014163332751819066
          total_loss: 0.08022268895237218
          vf_explained_var: 0.9759594798088074
          vf_loss: 0.09807288620088782
    num_agent_steps_sampled: 1626372
    num_agent_steps_trained: 1626372
    num_steps_sampled: 1626372
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,814,22285.5,1626372,9.0383,14.59,3.84,106.61


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1628370
  custom_metrics: {}
  date: 2021-11-09_09-57-26
  done: false
  episode_len_mean: 108.08
  episode_media: {}
  episode_reward_max: 14.590000000000016
  episode_reward_mean: 8.698500000000019
  episode_reward_min: 0.43000000000001837
  episodes_this_iter: 17
  episodes_total: 15995
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4094694710913158
          entropy_coeff: 0.009999999999999998
          kl: 0.012825270867826644
          policy_loss: -0.07055078205608187
          total_loss: 0.13605198422890333
          vf_explained_var: 0.9589438438415527
          vf_loss: 0.20509716290093605
    num_agent_steps_sampled: 1628370
    num_agent_steps_trained: 1628370
    num_steps_sampled: 1628370
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,815,22308.7,1628370,8.6985,14.59,0.43,108.08


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1630368
  custom_metrics: {}
  date: 2021-11-09_09-57-51
  done: false
  episode_len_mean: 107.88
  episode_media: {}
  episode_reward_max: 14.540000000000017
  episode_reward_mean: 8.37810000000002
  episode_reward_min: 0.43000000000001837
  episodes_this_iter: 19
  episodes_total: 16014
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3800362802687145
          entropy_coeff: 0.009999999999999998
          kl: 0.01044106782133613
          policy_loss: -0.025369624474218914
          total_loss: 0.1599076929945676
          vf_explained_var: 0.971102774143219
          vf_loss: 0.18637745997735433
    num_agent_steps_sampled: 1630368
    num_agent_steps_trained: 1630368
    num_steps_sampled: 1630368
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,816,22334,1630368,8.3781,14.54,0.43,107.88




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1632366
  custom_metrics: {}
  date: 2021-11-09_09-58-46
  done: false
  episode_len_mean: 108.09
  episode_media: {}
  episode_reward_max: 14.540000000000017
  episode_reward_mean: 8.29580000000002
  episode_reward_min: -0.02
  episodes_this_iter: 19
  episodes_total: 16033
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3195824887071337
          entropy_coeff: 0.009999999999999998
          kl: 0.015609503336533999
          policy_loss: -0.01088421813966263
          total_loss: 0.3509893502507891
          vf_explained_var: 0.9595812559127808
          vf_loss: 0.3560824308721792
    num_agent_steps_sampled: 1632366
    num_agent_steps_trained: 1632366
    num_steps_sampled: 1632366
    num_steps_trained: 1632366
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,817,22388.3,1632366,8.2958,14.54,-0.02,108.09


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1634364
  custom_metrics: {}
  date: 2021-11-09_09-59-12
  done: false
  episode_len_mean: 107.91
  episode_media: {}
  episode_reward_max: 14.540000000000017
  episode_reward_mean: 8.341800000000019
  episode_reward_min: -0.02
  episodes_this_iter: 19
  episodes_total: 16052
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4136370783760435
          entropy_coeff: 0.009999999999999998
          kl: 0.007994732209194586
          policy_loss: -0.051494778586285456
          total_loss: 0.05521023502307279
          vf_explained_var: 0.9737691283226013
          vf_loss: 0.11111681542492338
    num_agent_steps_sampled: 1634364
    num_agent_steps_trained: 1634364
    num_steps_sampled: 1634364
    num_steps_trained: 1634364
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,818,22414.3,1634364,8.3418,14.54,-0.02,107.91




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1636362
  custom_metrics: {}
  date: 2021-11-09_09-59-50
  done: false
  episode_len_mean: 106.66
  episode_media: {}
  episode_reward_max: 14.540000000000017
  episode_reward_mean: 8.383500000000017
  episode_reward_min: -0.03
  episodes_this_iter: 18
  episodes_total: 16070
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.368496356691633
          entropy_coeff: 0.009999999999999998
          kl: 0.010338177125388284
          policy_loss: -0.012396152388481867
          total_loss: 0.2150921221290316
          vf_explained_var: 0.9740027189254761
          vf_loss: 0.2285981668248063
    num_agent_steps_sampled: 1636362
    num_agent_steps_trained: 1636362
    num_steps_sampled: 1636362
    num_steps_trained: 1636362
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,819,22452.6,1636362,8.3835,14.54,-0.03,106.66


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1638360
  custom_metrics: {}
  date: 2021-11-09_10-00-14
  done: false
  episode_len_mean: 106.41
  episode_media: {}
  episode_reward_max: 14.630000000000013
  episode_reward_mean: 8.67090000000002
  episode_reward_min: -0.03
  episodes_this_iter: 20
  episodes_total: 16090
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4030587536948067
          entropy_coeff: 0.009999999999999998
          kl: 0.012031574321534186
          policy_loss: -0.03803053963042441
          total_loss: 0.17230262680067904
          vf_explained_var: 0.9697033166885376
          vf_loss: 0.20972888540653956
    num_agent_steps_sampled: 1638360
    num_agent_steps_trained: 1638360
    num_steps_sampled: 1638360
    num_steps_trained: 1638360
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,820,22476.5,1638360,8.6709,14.63,-0.03,106.41


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1640358
  custom_metrics: {}
  date: 2021-11-09_10-00-47
  done: false
  episode_len_mean: 106.89
  episode_media: {}
  episode_reward_max: 14.630000000000013
  episode_reward_mean: 8.819900000000017
  episode_reward_min: -0.03
  episodes_this_iter: 19
  episodes_total: 16109
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3380953164327711
          entropy_coeff: 0.009999999999999998
          kl: 0.011095698813231764
          policy_loss: -0.025229258374089285
          total_loss: 0.1524853133995618
          vf_explained_var: 0.9764301776885986
          vf_loss: 0.17759902656433127
    num_agent_steps_sampled: 1640358
    num_agent_steps_trained: 1640358
    num_steps_sampled: 1640358
    num_steps_trained: 1640358
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,821,22509.2,1640358,8.8199,14.63,-0.03,106.89


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1642356
  custom_metrics: {}
  date: 2021-11-09_10-01-12
  done: false
  episode_len_mean: 106.53
  episode_media: {}
  episode_reward_max: 14.630000000000013
  episode_reward_mean: 8.924200000000019
  episode_reward_min: -0.03
  episodes_this_iter: 19
  episodes_total: 16128
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3360841313997904
          entropy_coeff: 0.009999999999999998
          kl: 0.009782747730820376
          policy_loss: -0.09964706880217862
          total_loss: 0.12381848088864769
          vf_explained_var: 0.9667130708694458
          vf_loss: 0.22492693488796553
    num_agent_steps_sampled: 1642356
    num_agent_steps_trained: 1642356
    num_steps_sampled: 1642356
    num_steps_trained: 1642356
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,822,22534.1,1642356,8.9242,14.63,-0.03,106.53


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1644354
  custom_metrics: {}
  date: 2021-11-09_10-01-36
  done: false
  episode_len_mean: 104.87
  episode_media: {}
  episode_reward_max: 14.630000000000013
  episode_reward_mean: 9.018400000000018
  episode_reward_min: -0.03
  episodes_this_iter: 19
  episodes_total: 16147
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3406386647905622
          entropy_coeff: 0.009999999999999998
          kl: 0.007938331587774572
          policy_loss: -0.08980019037567434
          total_loss: 0.11221112718450882
          vf_explained_var: 0.9744575023651123
          vf_loss: 0.20576174055181798
    num_agent_steps_sampled: 1644354
    num_agent_steps_trained: 1644354
    num_steps_sampled: 1644354
    num_steps_trained: 1644354
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,823,22558.7,1644354,9.0184,14.63,-0.03,104.87


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1646352
  custom_metrics: {}
  date: 2021-11-09_10-01-59
  done: false
  episode_len_mean: 107.05
  episode_media: {}
  episode_reward_max: 14.630000000000013
  episode_reward_mean: 9.09840000000002
  episode_reward_min: 4.110000000000021
  episodes_this_iter: 18
  episodes_total: 16165
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3707582377252125
          entropy_coeff: 0.009999999999999998
          kl: 0.009616539838880412
          policy_loss: -0.03527739965135143
          total_loss: 0.09608380156790926
          vf_explained_var: 0.9774616956710815
          vf_loss: 0.13337149424921899
    num_agent_steps_sampled: 1646352
    num_agent_steps_trained: 1646352
    num_steps_sampled: 1646352
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,824,22581,1646352,9.0984,14.63,4.11,107.05


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1648350
  custom_metrics: {}
  date: 2021-11-09_10-02-23
  done: false
  episode_len_mean: 106.35
  episode_media: {}
  episode_reward_max: 14.550000000000017
  episode_reward_mean: 9.077900000000017
  episode_reward_min: 4.110000000000021
  episodes_this_iter: 18
  episodes_total: 16183
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.368929203919002
          entropy_coeff: 0.009999999999999998
          kl: 0.009615148543530187
          policy_loss: -0.029344881370308853
          total_loss: 0.08621092056412072
          vf_explained_var: 0.9853140115737915
          vf_loss: 0.11754949946133864
    num_agent_steps_sampled: 1648350
    num_agent_steps_trained: 1648350
    num_steps_sampled: 1648350
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,825,22605.3,1648350,9.0779,14.55,4.11,106.35


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1650348
  custom_metrics: {}
  date: 2021-11-09_10-02-47
  done: false
  episode_len_mean: 106.43
  episode_media: {}
  episode_reward_max: 14.550000000000017
  episode_reward_mean: 8.956400000000018
  episode_reward_min: 4.110000000000021
  episodes_this_iter: 20
  episodes_total: 16203
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3772373750096276
          entropy_coeff: 0.009999999999999998
          kl: 0.009125368828100614
          policy_loss: -0.08224509181011291
          total_loss: 0.06826940072434289
          vf_explained_var: 0.9800993204116821
          vf_loss: 0.15318702475300858
    num_agent_steps_sampled: 1650348
    num_agent_steps_trained: 1650348
    num_steps_sampled: 1650348
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,826,22629.5,1650348,8.9564,14.55,4.11,106.43


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1652346
  custom_metrics: {}
  date: 2021-11-09_10-03-11
  done: false
  episode_len_mean: 105.93
  episode_media: {}
  episode_reward_max: 14.550000000000017
  episode_reward_mean: 9.090600000000018
  episode_reward_min: 4.05000000000002
  episodes_this_iter: 18
  episodes_total: 16221
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3724467379706247
          entropy_coeff: 0.009999999999999998
          kl: 0.00824753387473213
          policy_loss: -0.05807019674352237
          total_loss: 0.05171900178704943
          vf_explained_var: 0.9768297672271729
          vf_loss: 0.11348159671539353
    num_agent_steps_sampled: 1652346
    num_agent_steps_trained: 1652346
    num_steps_sampled: 1652346
    num_steps_trained: 165

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,827,22653.2,1652346,9.0906,14.55,4.05,105.93


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1654344
  custom_metrics: {}
  date: 2021-11-09_10-03-36
  done: false
  episode_len_mean: 107.56
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.68910000000002
  episode_reward_min: 2.8300000000000196
  episodes_this_iter: 19
  episodes_total: 16240
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3632143769945417
          entropy_coeff: 0.009999999999999998
          kl: 0.009291575183977349
          policy_loss: -0.042278051873048145
          total_loss: 0.0967505992878051
          vf_explained_var: 0.9800856709480286
          vf_loss: 0.14135878394756998
    num_agent_steps_sampled: 1654344
    num_agent_steps_trained: 1654344
    num_steps_sampled: 1654344
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,828,22677.7,1654344,8.6891,14.68,2.83,107.56


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1656342
  custom_metrics: {}
  date: 2021-11-09_10-04-00
  done: false
  episode_len_mean: 107.17
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.670600000000018
  episode_reward_min: 2.8300000000000196
  episodes_this_iter: 18
  episodes_total: 16258
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3415612998462858
          entropy_coeff: 0.009999999999999998
          kl: 0.008779053022821464
          policy_loss: -0.02115984729358128
          total_loss: 0.10672806714262281
          vf_explained_var: 0.9727094173431396
          vf_loss: 0.13062493504867667
    num_agent_steps_sampled: 1656342
    num_agent_steps_trained: 1656342
    num_steps_sampled: 1656342
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,829,22702.1,1656342,8.6706,14.68,2.83,107.17


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1658340
  custom_metrics: {}
  date: 2021-11-09_10-04-24
  done: false
  episode_len_mean: 106.93
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.758600000000019
  episode_reward_min: 2.8300000000000196
  episodes_this_iter: 20
  episodes_total: 16278
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4006566950253079
          entropy_coeff: 0.009999999999999998
          kl: 0.009877408230199844
          policy_loss: -0.038792249666793004
          total_loss: 0.1013863057785091
          vf_explained_var: 0.9739933609962463
          vf_loss: 0.142170520996054
    num_agent_steps_sampled: 1658340
    num_agent_steps_trained: 1658340
    num_steps_sampled: 1658340
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,830,22726.6,1658340,8.7586,14.68,2.83,106.93


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1660338
  custom_metrics: {}
  date: 2021-11-09_10-04-50
  done: false
  episode_len_mean: 106.66
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.769800000000018
  episode_reward_min: 2.8300000000000196
  episodes_this_iter: 18
  episodes_total: 16296
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3746763649440947
          entropy_coeff: 0.009999999999999998
          kl: 0.016263930339060175
          policy_loss: -0.013409013212436722
          total_loss: 0.2059663671822775
          vf_explained_var: 0.9689200520515442
          vf_loss: 0.21333915475933324
    num_agent_steps_sampled: 1660338
    num_agent_steps_trained: 1660338
    num_steps_sampled: 1660338
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,831,22751.6,1660338,8.7698,14.68,2.83,106.66


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1662336
  custom_metrics: {}
  date: 2021-11-09_10-05-14
  done: false
  episode_len_mean: 106.97
  episode_media: {}
  episode_reward_max: 14.680000000000014
  episode_reward_mean: 8.841400000000018
  episode_reward_min: 2.6200000000000205
  episodes_this_iter: 18
  episodes_total: 16314
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.394651528767177
          entropy_coeff: 0.009999999999999998
          kl: 0.009081601746839361
          policy_loss: -0.03400879385215896
          total_loss: 0.11714513704535506
          vf_explained_var: 0.9713095426559448
          vf_loss: 0.15405384195702418
    num_agent_steps_sampled: 1662336
    num_agent_steps_trained: 1662336
    num_steps_sampled: 1662336
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,832,22775.5,1662336,8.8414,14.68,2.62,106.97


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1664334
  custom_metrics: {}
  date: 2021-11-09_10-05-39
  done: false
  episode_len_mean: 107.3
  episode_media: {}
  episode_reward_max: 14.570000000000014
  episode_reward_mean: 8.635600000000016
  episode_reward_min: 2.6200000000000205
  episodes_this_iter: 20
  episodes_total: 16334
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4042484663781665
          entropy_coeff: 0.009999999999999998
          kl: 0.01006137814812107
          policy_loss: -0.011300418384018398
          total_loss: 0.199104731804913
          vf_explained_var: 0.9776636958122253
          vf_loss: 0.2122092560288452
    num_agent_steps_sampled: 1664334
    num_agent_steps_trained: 1664334
    num_steps_sampled: 1664334
    num_steps_trained: 1664

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,833,22800.5,1664334,8.6356,14.57,2.62,107.3


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1666332
  custom_metrics: {}
  date: 2021-11-09_10-06-04
  done: false
  episode_len_mean: 105.82
  episode_media: {}
  episode_reward_max: 14.650000000000013
  episode_reward_mean: 9.240700000000018
  episode_reward_min: 2.6200000000000205
  episodes_this_iter: 18
  episodes_total: 16352
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3501358304704938
          entropy_coeff: 0.009999999999999998
          kl: 0.009299811601631684
          policy_loss: -0.004321863111995516
          total_loss: 0.16800050582914125
          vf_explained_var: 0.9758537411689758
          vf_loss: 0.17451169932527202
    num_agent_steps_sampled: 1666332
    num_agent_steps_trained: 1666332
    num_steps_sampled: 1666332
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,834,22826.2,1666332,9.2407,14.65,2.62,105.82


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1668330
  custom_metrics: {}
  date: 2021-11-09_10-06-30
  done: false
  episode_len_mean: 104.42
  episode_media: {}
  episode_reward_max: 14.650000000000013
  episode_reward_mean: 9.26460000000002
  episode_reward_min: 2.6200000000000205
  episodes_this_iter: 21
  episodes_total: 16373
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.2756472661381677
          entropy_coeff: 0.009999999999999998
          kl: 0.012020708945195998
          policy_loss: -0.01118973804016908
          total_loss: 0.2139585891117652
          vf_explained_var: 0.973414957523346
          vf_loss: 0.223283147936066
    num_agent_steps_sampled: 1668330
    num_agent_steps_trained: 1668330
    num_steps_sampled: 1668330
    num_steps_trained: 16683

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,835,22851.5,1668330,9.2646,14.65,2.62,104.42




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1670328
  custom_metrics: {}
  date: 2021-11-09_10-07-12
  done: false
  episode_len_mean: 102.34
  episode_media: {}
  episode_reward_max: 14.650000000000013
  episode_reward_mean: 9.146200000000016
  episode_reward_min: -0.07
  episodes_this_iter: 20
  episodes_total: 16393
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4098585946219309
          entropy_coeff: 0.009999999999999998
          kl: 0.008660736867762777
          policy_loss: -0.04289105491978781
          total_loss: 0.2467676018142984
          vf_explained_var: 0.9542752504348755
          vf_loss: 0.29322256452980494
    num_agent_steps_sampled: 1670328
    num_agent_steps_trained: 1670328
    num_steps_sampled: 1670328
    num_steps_trained: 1670328
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,836,22894,1670328,9.1462,14.65,-0.07,102.34


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1672326
  custom_metrics: {}
  date: 2021-11-09_10-07-37
  done: false
  episode_len_mean: 102.34
  episode_media: {}
  episode_reward_max: 14.650000000000013
  episode_reward_mean: 9.168100000000017
  episode_reward_min: -0.07
  episodes_this_iter: 20
  episodes_total: 16413
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3441459547905694
          entropy_coeff: 0.009999999999999998
          kl: 0.00955839103554329
          policy_loss: -0.05888264518053759
          total_loss: 0.04861881514745099
          vf_explained_var: 0.9837036728858948
          vf_loss: 0.1093163611633437
    num_agent_steps_sampled: 1672326
    num_agent_steps_trained: 1672326
    num_steps_sampled: 1672326
    num_steps_trained: 1672326
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,837,22919.2,1672326,9.1681,14.65,-0.07,102.34




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1674324
  custom_metrics: {}
  date: 2021-11-09_10-08-16
  done: false
  episode_len_mean: 101.55
  episode_media: {}
  episode_reward_max: 14.650000000000013
  episode_reward_mean: 9.335000000000017
  episode_reward_min: -0.07
  episodes_this_iter: 19
  episodes_total: 16432
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4103782818430946
          entropy_coeff: 0.009999999999999998
          kl: 0.007974149470693014
          policy_loss: -0.04742362820321605
          total_loss: 0.13502751323616222
          vf_explained_var: 0.9604730010032654
          vf_loss: 0.18685539210037816
    num_agent_steps_sampled: 1674324
    num_agent_steps_trained: 1674324
    num_steps_sampled: 1674324
    num_steps_trained: 1674324
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,838,22958.2,1674324,9.335,14.65,-0.07,101.55


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1676322
  custom_metrics: {}
  date: 2021-11-09_10-08-41
  done: false
  episode_len_mean: 101.88
  episode_media: {}
  episode_reward_max: 14.640000000000013
  episode_reward_mean: 8.849000000000016
  episode_reward_min: -0.07
  episodes_this_iter: 19
  episodes_total: 16451
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4456986966587249
          entropy_coeff: 0.009999999999999998
          kl: 0.010911552611077843
          policy_loss: -0.028392475630555834
          total_loss: 0.1090450997863497
          vf_explained_var: 0.977282702922821
          vf_loss: 0.13862205733145985
    num_agent_steps_sampled: 1676322
    num_agent_steps_trained: 1676322
    num_steps_sampled: 1676322
    num_steps_trained: 1676322
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,839,22982.6,1676322,8.849,14.64,-0.07,101.88


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1678320
  custom_metrics: {}
  date: 2021-11-09_10-09-05
  done: false
  episode_len_mean: 102.63
  episode_media: {}
  episode_reward_max: 14.640000000000013
  episode_reward_mean: 8.784000000000017
  episode_reward_min: -0.07
  episodes_this_iter: 18
  episodes_total: 16469
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3788982871032895
          entropy_coeff: 0.009999999999999998
          kl: 0.008223874477498632
          policy_loss: -0.11332817279866764
          total_loss: -0.021942010796850635
          vf_explained_var: 0.9827289581298828
          vf_loss: 0.09517185335003195
    num_agent_steps_sampled: 1678320
    num_agent_steps_trained: 1678320
    num_steps_sampled: 1678320
    num_steps_trained: 1678320
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,840,23006.7,1678320,8.784,14.64,-0.07,102.63


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1680318
  custom_metrics: {}
  date: 2021-11-09_10-09-30
  done: false
  episode_len_mean: 104.48
  episode_media: {}
  episode_reward_max: 14.640000000000013
  episode_reward_mean: 8.98130000000002
  episode_reward_min: -0.07
  episodes_this_iter: 19
  episodes_total: 16488
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4077376456487747
          entropy_coeff: 0.009999999999999998
          kl: 0.008910629809858607
          policy_loss: -0.01629850541551908
          total_loss: 0.07495114328783183
          vf_explained_var: 0.9845319390296936
          vf_loss: 0.09448838697835094
    num_agent_steps_sampled: 1680318
    num_agent_steps_trained: 1680318
    num_steps_sampled: 1680318
    num_steps_trained: 1680318
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,841,23031.5,1680318,8.9813,14.64,-0.07,104.48


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1682316
  custom_metrics: {}
  date: 2021-11-09_10-09-54
  done: false
  episode_len_mean: 105.32
  episode_media: {}
  episode_reward_max: 14.640000000000013
  episode_reward_mean: 9.16090000000002
  episode_reward_min: -0.04
  episodes_this_iter: 19
  episodes_total: 16507
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.472584499063946
          entropy_coeff: 0.009999999999999998
          kl: 0.008903728324110951
          policy_loss: -0.07377894133041125
          total_loss: -0.0028079715036299258
          vf_explained_var: 0.9857209920883179
          vf_loss: 0.07486656946795327
    num_agent_steps_sampled: 1682316
    num_agent_steps_trained: 1682316
    num_steps_sampled: 1682316
    num_steps_trained: 1682316
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,842,23056,1682316,9.1609,14.64,-0.04,105.32


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1684314
  custom_metrics: {}
  date: 2021-11-09_10-10-19
  done: false
  episode_len_mean: 106.55
  episode_media: {}
  episode_reward_max: 14.610000000000015
  episode_reward_mean: 9.07400000000002
  episode_reward_min: 2.750000000000016
  episodes_this_iter: 18
  episodes_total: 16525
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3711709811573938
          entropy_coeff: 0.009999999999999998
          kl: 0.008819318886048238
          policy_loss: -0.049830750375986096
          total_loss: 0.10626755042799882
          vf_explained_var: 0.9794594645500183
          vf_loss: 0.1590824392402456
    num_agent_steps_sampled: 1684314
    num_agent_steps_trained: 1684314
    num_steps_sampled: 1684314
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,843,23080.2,1684314,9.074,14.61,2.75,106.55


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1686312
  custom_metrics: {}
  date: 2021-11-09_10-10-43
  done: false
  episode_len_mean: 106.8
  episode_media: {}
  episode_reward_max: 14.610000000000015
  episode_reward_mean: 9.206400000000018
  episode_reward_min: 3.070000000000011
  episodes_this_iter: 18
  episodes_total: 16543
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4040573761576698
          entropy_coeff: 0.009999999999999998
          kl: 0.006997465678371401
          policy_loss: -0.0648050725105263
          total_loss: -0.004654657148889133
          vf_explained_var: 0.988818883895874
          vf_loss: 0.06567946935870818
    num_agent_steps_sampled: 1686312
    num_agent_steps_trained: 1686312
    num_steps_sampled: 1686312
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,844,23104.9,1686312,9.2064,14.61,3.07,106.8


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1688310
  custom_metrics: {}
  date: 2021-11-09_10-11-07
  done: false
  episode_len_mean: 106.44
  episode_media: {}
  episode_reward_max: 14.610000000000015
  episode_reward_mean: 9.212600000000016
  episode_reward_min: 3.070000000000011
  episodes_this_iter: 20
  episodes_total: 16563
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3439697481337047
          entropy_coeff: 0.009999999999999998
          kl: 0.007581661524566216
          policy_loss: -0.08364584423779022
          total_loss: -0.04055590512497084
          vf_explained_var: 0.9936171770095825
          vf_loss: 0.047307516643334005
    num_agent_steps_sampled: 1688310
    num_agent_steps_trained: 1688310
    num_steps_sampled: 1688310
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,845,23129,1688310,9.2126,14.61,3.07,106.44


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1690308
  custom_metrics: {}
  date: 2021-11-09_10-11-32
  done: false
  episode_len_mean: 105.89
  episode_media: {}
  episode_reward_max: 14.750000000000012
  episode_reward_mean: 9.272100000000018
  episode_reward_min: 4.34000000000002
  episodes_this_iter: 20
  episodes_total: 16583
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.351705545470828
          entropy_coeff: 0.009999999999999998
          kl: 0.010064854557534043
          policy_loss: -0.026600954592937515
          total_loss: 0.0970964405863058
          vf_explained_var: 0.9749687314033508
          vf_loss: 0.12497184480584803
    num_agent_steps_sampled: 1690308
    num_agent_steps_trained: 1690308
    num_steps_sampled: 1690308
    num_steps_trained: 169

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,846,23153.4,1690308,9.2721,14.75,4.34,105.89


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1692306
  custom_metrics: {}
  date: 2021-11-09_10-11-55
  done: false
  episode_len_mean: 105.9
  episode_media: {}
  episode_reward_max: 14.750000000000012
  episode_reward_mean: 8.97620000000002
  episode_reward_min: 2.9900000000000153
  episodes_this_iter: 18
  episodes_total: 16601
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3977924965676807
          entropy_coeff: 0.009999999999999998
          kl: 0.010342318253836712
          policy_loss: -0.0486785406158084
          total_loss: 0.08439511097967625
          vf_explained_var: 0.9745340943336487
          vf_loss: 0.13447147150124822
    num_agent_steps_sampled: 1692306
    num_agent_steps_trained: 1692306
    num_steps_sampled: 1692306
    num_steps_trained: 169

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,847,23176.6,1692306,8.9762,14.75,2.99,105.9


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1694304
  custom_metrics: {}
  date: 2021-11-09_10-12-19
  done: false
  episode_len_mean: 105.73
  episode_media: {}
  episode_reward_max: 14.750000000000012
  episode_reward_mean: 8.805100000000017
  episode_reward_min: 2.840000000000019
  episodes_this_iter: 19
  episodes_total: 16620
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4341133185795376
          entropy_coeff: 0.009999999999999998
          kl: 0.011286200059099406
          policy_loss: -0.06952212587708519
          total_loss: 0.03474110526343187
          vf_explained_var: 0.9792494773864746
          vf_loss: 0.10487614880715097
    num_agent_steps_sampled: 1694304
    num_agent_steps_trained: 1694304
    num_steps_sampled: 1694304
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,848,23200.9,1694304,8.8051,14.75,2.84,105.73


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1696302
  custom_metrics: {}
  date: 2021-11-09_10-12-44
  done: false
  episode_len_mean: 104.93
  episode_media: {}
  episode_reward_max: 14.750000000000012
  episode_reward_mean: 8.719400000000018
  episode_reward_min: 2.840000000000019
  episodes_this_iter: 19
  episodes_total: 16639
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3621345957120259
          entropy_coeff: 0.009999999999999998
          kl: 0.010058779042171697
          policy_loss: -0.07764048175442786
          total_loss: 0.04131183133771022
          vf_explained_var: 0.9766743779182434
          vf_loss: 0.12033844313451222
    num_agent_steps_sampled: 1696302
    num_agent_steps_trained: 1696302
    num_steps_sampled: 1696302
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,849,23225.7,1696302,8.7194,14.75,2.84,104.93


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1698300
  custom_metrics: {}
  date: 2021-11-09_10-13-08
  done: false
  episode_len_mean: 105.93
  episode_media: {}
  episode_reward_max: 14.750000000000012
  episode_reward_mean: 8.780200000000017
  episode_reward_min: 2.840000000000019
  episodes_this_iter: 18
  episodes_total: 16657
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3687753654661632
          entropy_coeff: 0.009999999999999998
          kl: 0.012349856466467045
          policy_loss: -0.042661495435805545
          total_loss: 0.13729925490915776
          vf_explained_var: 0.9705672264099121
          vf_loss: 0.17862648703157902
    num_agent_steps_sampled: 1698300
    num_agent_steps_trained: 1698300
    num_steps_sampled: 1698300
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,850,23249.6,1698300,8.7802,14.75,2.84,105.93


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1700298
  custom_metrics: {}
  date: 2021-11-09_10-13-34
  done: false
  episode_len_mean: 105.08
  episode_media: {}
  episode_reward_max: 14.750000000000012
  episode_reward_mean: 9.149600000000017
  episode_reward_min: 2.840000000000019
  episodes_this_iter: 20
  episodes_total: 16677
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.341354220821744
          entropy_coeff: 0.009999999999999998
          kl: 0.008595986670308961
          policy_loss: -0.023003303933711278
          total_loss: 0.0681826136119309
          vf_explained_var: 0.9881670475006104
          vf_loss: 0.09414354589368616
    num_agent_steps_sampled: 1700298
    num_agent_steps_trained: 1700298
    num_steps_sampled: 1700298
    num_steps_trained: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,851,23275.8,1700298,9.1496,14.75,2.84,105.08


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1702296
  custom_metrics: {}
  date: 2021-11-09_10-14-00
  done: false
  episode_len_mean: 105.91
  episode_media: {}
  episode_reward_max: 14.740000000000014
  episode_reward_mean: 9.095500000000017
  episode_reward_min: 1.1700000000000161
  episodes_this_iter: 19
  episodes_total: 16696
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3759015650976272
          entropy_coeff: 0.009999999999999998
          kl: 0.013308438842954955
          policy_loss: -0.02086525417509533
          total_loss: 0.23901858660614206
          vf_explained_var: 0.95055091381073
          vf_loss: 0.25745484933611895
    num_agent_steps_sampled: 1702296
    num_agent_steps_trained: 1702296
    num_steps_sampled: 1702296
    num_steps_trained: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,852,23301.6,1702296,9.0955,14.74,1.17,105.91


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1704294
  custom_metrics: {}
  date: 2021-11-09_10-14-26
  done: false
  episode_len_mean: 105.45
  episode_media: {}
  episode_reward_max: 14.740000000000014
  episode_reward_mean: 9.454100000000016
  episode_reward_min: 1.1700000000000161
  episodes_this_iter: 19
  episodes_total: 16715
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.369614182767414
          entropy_coeff: 0.009999999999999998
          kl: 0.00845920213979852
          policy_loss: -0.051003808643491494
          total_loss: 0.059141410106704345
          vf_explained_var: 0.9853562116622925
          vf_loss: 0.11355182497451703
    num_agent_steps_sampled: 1704294
    num_agent_steps_trained: 1704294
    num_steps_sampled: 1704294
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,853,23327.1,1704294,9.4541,14.74,1.17,105.45




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1706292
  custom_metrics: {}
  date: 2021-11-09_10-15-05
  done: false
  episode_len_mean: 104.47
  episode_media: {}
  episode_reward_max: 14.740000000000014
  episode_reward_mean: 9.326600000000019
  episode_reward_min: -0.05
  episodes_this_iter: 19
  episodes_total: 16734
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3613509643645514
          entropy_coeff: 0.009999999999999998
          kl: 0.008103028260022275
          policy_loss: -0.08150580954693612
          total_loss: -0.005307117556887013
          vf_explained_var: 0.985023558139801
          vf_loss: 0.07995590581780389
    num_agent_steps_sampled: 1706292
    num_agent_steps_trained: 1706292
    num_steps_sampled: 1706292
    num_steps_trained: 1706292
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,854,23366.7,1706292,9.3266,14.74,-0.05,104.47




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1708290
  custom_metrics: {}
  date: 2021-11-09_10-15-46
  done: false
  episode_len_mean: 102.8
  episode_media: {}
  episode_reward_max: 14.550000000000015
  episode_reward_mean: 9.318200000000019
  episode_reward_min: -0.05
  episodes_this_iter: 20
  episodes_total: 16754
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3038408234005883
          entropy_coeff: 0.009999999999999998
          kl: 0.01266846847102756
          policy_loss: -0.021927619335197268
          total_loss: 0.23095140778681353
          vf_explained_var: 0.9568880796432495
          vf_loss: 0.2505078680281128
    num_agent_steps_sampled: 1708290
    num_agent_steps_trained: 1708290
    num_steps_sampled: 1708290
    num_steps_trained: 1708290
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,855,23407.4,1708290,9.3182,14.55,-0.05,102.8




Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1710288
  custom_metrics: {}
  date: 2021-11-09_10-16-28
  done: false
  episode_len_mean: 102.52
  episode_media: {}
  episode_reward_max: 14.550000000000015
  episode_reward_mean: 8.917500000000018
  episode_reward_min: -0.05
  episodes_this_iter: 21
  episodes_total: 16775
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3366328693571545
          entropy_coeff: 0.009999999999999998
          kl: 0.009387890097870812
          policy_loss: -0.023993409136753706
          total_loss: 0.08568253125108424
          vf_explained_var: 0.9747622609138489
          vf_loss: 0.11162310423595564
    num_agent_steps_sampled: 1710288
    num_agent_steps_trained: 1710288
    num_steps_sampled: 1710288
    num_steps_trained: 1710288
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,856,23449.1,1710288,8.9175,14.55,-0.05,102.52


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1712286
  custom_metrics: {}
  date: 2021-11-09_10-16-53
  done: false
  episode_len_mean: 102.81
  episode_media: {}
  episode_reward_max: 14.550000000000015
  episode_reward_mean: 9.055900000000017
  episode_reward_min: -0.05
  episodes_this_iter: 18
  episodes_total: 16793
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3667687370663597
          entropy_coeff: 0.009999999999999998
          kl: 0.008267318610633706
          policy_loss: -0.010102544352412224
          total_loss: 0.09599313003321489
          vf_explained_var: 0.9834508299827576
          vf_loss: 0.10970722817416702
    num_agent_steps_sampled: 1712286
    num_agent_steps_trained: 1712286
    num_steps_sampled: 1712286
    num_steps_trained: 1712286
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,857,23474.2,1712286,9.0559,14.55,-0.05,102.81


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1714284
  custom_metrics: {}
  date: 2021-11-09_10-17-17
  done: false
  episode_len_mean: 102.87
  episode_media: {}
  episode_reward_max: 14.590000000000016
  episode_reward_mean: 8.865800000000018
  episode_reward_min: -0.05
  episodes_this_iter: 18
  episodes_total: 16811
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.4526844490142095
          entropy_coeff: 0.009999999999999998
          kl: 0.00917424487004774
          policy_loss: -0.04334788194724492
          total_loss: 0.10693526936783677
          vf_explained_var: 0.9755388498306274
          vf_loss: 0.1536507011169479
    num_agent_steps_sampled: 1714284
    num_agent_steps_trained: 1714284
    num_steps_sampled: 1714284
    num_steps_trained: 1714284
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,858,23497.8,1714284,8.8658,14.59,-0.05,102.87


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1716282
  custom_metrics: {}
  date: 2021-11-09_10-17-41
  done: false
  episode_len_mean: 102.91
  episode_media: {}
  episode_reward_max: 14.590000000000016
  episode_reward_mean: 8.978300000000017
  episode_reward_min: -0.05
  episodes_this_iter: 20
  episodes_total: 16831
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.3277374909037636
          entropy_coeff: 0.009999999999999998
          kl: 0.010084683919337051
          policy_loss: -0.05189268554427794
          total_loss: 0.08386854852239291
          vf_explained_var: 0.9799172282218933
          vf_loss: 0.13677188420579547
    num_agent_steps_sampled: 1716282
    num_agent_steps_trained: 1716282
    num_steps_sampled: 1716282
    num_steps_trained: 1716282
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,859,23522.2,1716282,8.9783,14.59,-0.05,102.91


Result for PPO_my_env_693ef_00000:
  agent_timesteps_total: 1718280
  custom_metrics: {}
  date: 2021-11-09_10-18-06
  done: false
  episode_len_mean: 104.61
  episode_media: {}
  episode_reward_max: 14.590000000000016
  episode_reward_mean: 9.098800000000018
  episode_reward_min: -0.02
  episodes_this_iter: 19
  episodes_total: 16850
  experiment_id: 919abc63de9242ecb5d48569c8e42c93
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2163719177246097
          cur_lr: 5.000000000000001e-05
          entropy: 1.405408165000734
          entropy_coeff: 0.009999999999999998
          kl: 0.010105208621496161
          policy_loss: -0.07035905475772562
          total_loss: 0.038200175744437036
          vf_explained_var: 0.9817296862602234
          vf_loss: 0.11032161866092965
    num_agent_steps_sampled: 1718280
    num_agent_steps_trained: 1718280
    num_steps_sampled: 1718280
    num_steps_trained: 1718280
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_693ef_00000,RUNNING,192.168.3.5:170,860,23547.2,1718280,9.0988,14.59,-0.02,104.61


Process _WandbLoggingProcess-1:
Traceback (most recent call last):
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/integration/wandb.py", line 200, in run
    result = self.queue.get()
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/queues.py", line 94, in get
    res = self._recv_bytes()
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/root/miniconda/en

Traceback (most recent call last):
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ipython-7.25.0-py3.7.egg/IPython/core/interactiveshell.py", line 3441, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_56/438550406.py", line 33, in <module>
    checkpoint_at_end=True)
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/tune.py", line 532, in run
    runner.step()
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 554, in step
    self._process_events(timeout=timeout)  # blocking
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 675, in _process_events
    timeout=timeout)  # blocking
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 718, in get_next_available_trial
    ready, _ = ray.wait(shuffled_results, timeout=timeout)
  File "/root/miniconda/envs/py37/lib/python3.7/site-pac

TypeError: object of type 'NoneType' has no len()