In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=2, stride=2, padding=0),  
            nn.ELU(),
            nn.Conv2d(32, 32, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(32, 64, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(64, 128, kernel_size=2, stride=2, padding=0),
            nn.ELU(), 
            nn.Conv2d(128, 256, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Conv2d(256, 512, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
from torch.nn.functional import one_hot

class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        visual_features_dim = 512
        target_features_dim = 9 * 11 * 11 
        self.visual_encoder = VisualEncoder()
        self.visual_encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AngelaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.target_encoder = nn.Sequential(
            nn.Conv3d(7, 1, kernel_size=1, stride=1, padding=0),
            nn.ELU(),
        )
        policy_hidden_dim = 256 
        self.policy_network = nn.Sequential(
            nn.Linear(visual_features_dim + target_features_dim, 1024),
            nn.ELU(),
            nn.Linear(1024, 1024),
            nn.ELU(),
            nn.Linear(1024, 1024),
            nn.ELU(),
            nn.Linear(1024, policy_hidden_dim),
            nn.ELU(),
            #nn.Linear(policy_hidden_dim, policy_hidden_dim),
            #nn.ELU(),
        )
        self.action_head = nn.Linear(policy_hidden_dim, action_space.n)
        self.value_head = nn.Linear(policy_hidden_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.visual_encoder.cuda()
            self.target_encoder.cuda()
            self.policy_network.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs']
        pov = obs['pov'].permute(0, 3, 1, 2).float() / 255.0
        target = one_hot(obs['target_grid'].long(), num_classes=7).permute(0, 4, 1, 2, 3).float()
        if self.use_cuda:
            pov.cuda()
            target.cuda()
            
        with torch.no_grad():
            visual_features = self.visual_encoder(pov)
            
        target_features = self.target_encoder(target)
        target_features = target_features.reshape(target_features.shape[0], -1)
        features = torch.cat([visual_features, target_features], dim=1)
        features = self.policy_network(features)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
visual_features_dim = 512
target_features_dim = 9 * 11 * 11
policy_hidden_dim = 256 

policy_network = nn.Sequential(
    nn.Linear(visual_features_dim + target_features_dim, 1024),
    nn.ELU(),
    nn.Linear(1024, 512),
    nn.ELU(),
    nn.Linear(512, policy_hidden_dim),
    nn.ELU(),
    nn.Linear(policy_hidden_dim, policy_hidden_dim),
    nn.ELU(),
    #nn.Linear(policy_hidden_dim, policy_hidden_dim),
    #nn.ELU(),
)

sum(p.numel() for p in policy_network.parameters())

2362368

In [5]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [6]:
class VisualObservationWrapper(ObsWrapper):
    def __init__(self, env, include_target=False):
        super().__init__(env)
        self.observation_space = {   
            'pov': gym.spaces.Box(low=0, high=255, shape=(64, 64, 3)),
            'inventory': gym.spaces.Box(low=0.0, high=20.0, shape=(6,)),
            'compass': gym.spaces.Box(low=-180.0, high=180.0, shape=(1,))
        }
        if include_target:
            self.observation_space['target_grid'] = \
                gym.spaces.Box(low=0, high=6, shape=(9, 11, 11))
        self.observation_space = gym.spaces.Dict(self.observation_space)

    def observation(self, obs, reward=None, done=None, info=None):
        if info is not None:
            if 'target_grid' in info:
                target_grid = info['target_grid']
                del info['target_grid']
            else:
                logger.error(f'info: {info}')
                if hasattr(self.unwrapped, 'should_reset'):
                    self.unwrapped.should_reset(True)
                target_grid = self.env.unwrapped.tasks.current.target_grid
        else:
            target_grid = self.env.unwrapped.tasks.current.target_grid
        return {
            'pov': obs['pov'].astype(np.float32),
            'inventory': obs['inventory'],
            'compass': np.array([obs['compass']['angle'].item()]),
            'target_grid': target_grid
        }

In [7]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

tasks = []
for i in range(1,156):
    if ('C'+str(i)) == 'C38': continue
    tasks.append('C'+str(i))
    
class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, rew):
        if rew == 0:
            rew = -0.01
        if abs(rew) == 1:
            rew /= 10
            
        return rew
    
def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=250)
    env.update_taskset(TaskSet(preset=tasks))
    #env = PovOnlyWrapper(env)
    env = VisualObservationWrapper(env, include_target=True)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    env = RewardWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 3,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 5_000,
             "lr": 1e-4,
             #"gamma": 0.99,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO All Tasks 2 
                      
                      
                      pretrained (AngelaCNN) (3 noops after placement) r: -0.01 div10"
                  }
              }

        },
        loggers=[WandbLogger],
        local_dir="/IGLU-Minecraft/checkpoints/all_tasks",
        keep_checkpoints_num=50,
        checkpoint_freq=5,
        checkpoint_at_end=True)

2021-11-07 14:13:17,442	INFO wandb.py:170 -- Already logged into W&B.
2021-11-07 14:13:17,458	ERROR syncer.py:72 -- Log sync requires rsync to be installed.


Trial name,status,loc
PPO_my_env_da758_00000,RUNNING,


[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.6 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=558908)[0m 2021-11-07 14:13:20,885	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=558908)[0m 2021-11-07 14:13:20,885	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=558908)[0m 2021-11-07 14:13:28,938	INFO trainable.py:109 -- Trainable.setup took 10.511 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 9996
  custom_metrics: {}
  date: 2021-11-07_14-17-03
  done: false
  episode_len_mean: 99.54545454545455
  episode_media: {}
  episode_reward_max: 2.860000000000003
  episode_reward_mean: -0.8212121212121217
  episode_reward_min: -1.6100000000000012
  episodes_this_iter: 99
  episodes_total: 99
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999998
          cur_lr: 0.00010000000000000002
          entropy: 2.8820862466453487
          entropy_coeff: 0.01
          kl: 0.007245431269884351
          policy_loss: -0.016355366078324808
          total_loss: 0.007637977097024265
          vf_explained_var: -0.17715410888195038
          vf_loss: 0.051365118665206766
    num_agent_steps_sampled: 9996
    num_agent_steps_trained: 9996
    num_steps_sampled: 9996
    num_steps_trained: 9996
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,1,214.072,9996,-0.821212,2.86,-1.61,99.5455


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 19992
  custom_metrics: {}
  date: 2021-11-07_14-19-00
  done: false
  episode_len_mean: 99.0990099009901
  episode_media: {}
  episode_reward_max: 2.610000000000003
  episode_reward_mean: -0.7611881188118819
  episode_reward_min: -1.6100000000000012
  episodes_this_iter: 101
  episodes_total: 200
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999998
          cur_lr: 0.00010000000000000002
          entropy: 2.871961500705817
          entropy_coeff: 0.01
          kl: 0.00986026421275549
          policy_loss: -0.02017569362034655
          total_loss: 0.0231408148812942
          vf_explained_var: -0.02427317015826702
          vf_loss: 0.07006406934661233
    num_agent_steps_sampled: 19992
    num_agent_steps_trained: 19992
    num_steps_sampled: 19992
    num_steps_trained: 19992
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,2,331.533,19992,-0.761188,2.61,-1.61,99.099


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 29988
  custom_metrics: {}
  date: 2021-11-07_14-20-54
  done: false
  episode_len_mean: 101.47
  episode_media: {}
  episode_reward_max: 2.439999999999999
  episode_reward_mean: -0.7144000000000007
  episode_reward_min: -1.5900000000000007
  episodes_this_iter: 98
  episodes_total: 298
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999998
          cur_lr: 0.00010000000000000002
          entropy: 2.8546294130830683
          entropy_coeff: 0.01
          kl: 0.013411636420832186
          policy_loss: -0.02397455284610773
          total_loss: 0.007206196466890665
          vf_explained_var: 0.27785927057266235
          vf_loss: 0.05704471586097
    num_agent_steps_sampled: 29988
    num_agent_steps_trained: 29988
    num_steps_sampled: 29988
    num_steps_trained: 29988
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,3,445.073,29988,-0.7144,2.44,-1.59,101.47




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 39984
  custom_metrics: {}
  date: 2021-11-07_14-23-15
  done: false
  episode_len_mean: 99.11881188118812
  episode_media: {}
  episode_reward_max: 4.92000000000001
  episode_reward_mean: -0.20445544554455455
  episode_reward_min: -1.7800000000000007
  episodes_this_iter: 101
  episodes_total: 399
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999998
          cur_lr: 0.00010000000000000002
          entropy: 2.830136453595936
          entropy_coeff: 0.01
          kl: 0.017423643066140772
          policy_loss: -0.030083215927593727
          total_loss: 0.11269993615360596
          vf_explained_var: 0.2986709177494049
          vf_loss: 0.16759978749462937
    num_agent_steps_sampled: 39984
    num_agent_steps_trained: 39984
    num_steps_sampled: 39984
    num_steps_trained: 39984
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,4,586.297,39984,-0.204455,4.92,-1.78,99.1188


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 49980
  custom_metrics: {}
  date: 2021-11-07_14-25-20
  done: false
  episode_len_mean: 101.69
  episode_media: {}
  episode_reward_max: 6.530000000000007
  episode_reward_mean: 0.3536000000000008
  episode_reward_min: -2.129999999999999
  episodes_this_iter: 99
  episodes_total: 498
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999998
          cur_lr: 0.00010000000000000002
          entropy: 2.786437828724201
          entropy_coeff: 0.01
          kl: 0.021151041119131263
          policy_loss: -0.02813620950278436
          total_loss: 0.281179541330307
          vf_explained_var: 0.2791663408279419
          vf_loss: 0.3329499189686189
    num_agent_steps_sampled: 49980
    num_agent_steps_trained: 49980
    num_steps_sampled: 49980
    num_steps_trained: 49980
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,5,711.635,49980,0.3536,6.53,-2.13,101.69


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 59976
  custom_metrics: {}
  date: 2021-11-07_14-27-32
  done: false
  episode_len_mean: 99.93
  episode_media: {}
  episode_reward_max: 4.5700000000000145
  episode_reward_mean: 0.5062000000000011
  episode_reward_min: -1.970000000000001
  episodes_this_iter: 99
  episodes_total: 597
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 0.00010000000000000002
          entropy: 2.7670964473333113
          entropy_coeff: 0.01
          kl: 0.019107513359859694
          policy_loss: -0.03416342549344413
          total_loss: 0.26162091678279076
          vf_explained_var: 0.3989444673061371
          vf_loss: 0.3177230527640408
    num_agent_steps_sampled: 59976
    num_agent_steps_trained: 59976
    num_steps_sampled: 59976
    num_steps_trained: 59976
  iterations_since_restore: 6
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,6,843.658,59976,0.5062,4.57,-1.97,99.93


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 69972
  custom_metrics: {}
  date: 2021-11-07_14-29-43
  done: false
  episode_len_mean: 99.3529411764706
  episode_media: {}
  episode_reward_max: 5.080000000000013
  episode_reward_mean: 0.7449019607843149
  episode_reward_min: -1.9800000000000009
  episodes_this_iter: 102
  episodes_total: 699
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 0.00010000000000000002
          entropy: 2.7409817080212453
          entropy_coeff: 0.01
          kl: 0.02057950165455991
          policy_loss: -0.03479871365568067
          total_loss: 0.22171845538621274
          vf_explained_var: 0.534519612789154
          vf_loss: 0.27775313418645126
    num_agent_steps_sampled: 69972
    num_agent_steps_trained: 69972
    num_steps_sampled: 69972
    num_steps_trained: 69972
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,7,973.891,69972,0.744902,5.08,-1.98,99.3529




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 79968
  custom_metrics: {}
  date: 2021-11-07_14-32-10
  done: false
  episode_len_mean: 95.62135922330097
  episode_media: {}
  episode_reward_max: 6.760000000000013
  episode_reward_mean: 0.8657281553398075
  episode_reward_min: -1.790000000000001
  episodes_this_iter: 103
  episodes_total: 802
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 0.00010000000000000002
          entropy: 2.734050679614401
          entropy_coeff: 0.01
          kl: 0.019611230072854584
          policy_loss: -0.035234777718527704
          total_loss: 0.29533397680801204
          vf_explained_var: 0.4814901649951935
          vf_loss: 0.34908420741558077
    num_agent_steps_sampled: 79968
    num_agent_steps_trained: 79968
    num_steps_sampled: 79968
    num_steps_trained: 79968
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,8,1121.39,79968,0.865728,6.76,-1.79,95.6214


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 89964
  custom_metrics: {}
  date: 2021-11-07_14-34-20
  done: false
  episode_len_mean: 97.6504854368932
  episode_media: {}
  episode_reward_max: 6.88000000000001
  episode_reward_mean: 0.9886407766990313
  episode_reward_min: -1.870000000000001
  episodes_this_iter: 103
  episodes_total: 905
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 0.00010000000000000002
          entropy: 2.717897022483695
          entropy_coeff: 0.01
          kl: 0.01972981722506659
          policy_loss: -0.038824943502425636
          total_loss: 0.27620428455675133
          vf_explained_var: 0.5166344046592712
          vf_loss: 0.33332978025970295
    num_agent_steps_sampled: 89964
    num_agent_steps_trained: 89964
    num_steps_sampled: 89964
    num_steps_trained: 89964
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,9,1250.82,89964,0.988641,6.88,-1.87,97.6505


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 99960
  custom_metrics: {}
  date: 2021-11-07_14-36-34
  done: false
  episode_len_mean: 94.8952380952381
  episode_media: {}
  episode_reward_max: 6.750000000000011
  episode_reward_mean: 1.1550476190476213
  episode_reward_min: -1.7100000000000009
  episodes_this_iter: 105
  episodes_total: 1010
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 0.00010000000000000002
          entropy: 2.706044305287875
          entropy_coeff: 0.01
          kl: 0.02049848573933016
          policy_loss: -0.038340762248819965
          total_loss: 0.26899709751925027
          vf_explained_var: 0.5775786638259888
          vf_loss: 0.3251739848882724
    num_agent_steps_sampled: 99960
    num_agent_steps_trained: 99960
    num_steps_sampled: 99960
    num_steps_trained: 99960
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,10,1385.16,99960,1.15505,6.75,-1.71,94.8952




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 109956
  custom_metrics: {}
  date: 2021-11-07_14-39-13
  done: false
  episode_len_mean: 90.75454545454545
  episode_media: {}
  episode_reward_max: 6.330000000000011
  episode_reward_mean: 0.8753636363636383
  episode_reward_min: -2.2499999999999987
  episodes_this_iter: 110
  episodes_total: 1120
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999999
          cur_lr: 0.00010000000000000002
          entropy: 2.7002450193095413
          entropy_coeff: 0.01
          kl: 0.022519630569657986
          policy_loss: -0.030922873155810895
          total_loss: 0.34228461658279613
          vf_explained_var: 0.5162562131881714
          vf_loss: 0.38500918899463793
    num_agent_steps_sampled: 109956
    num_agent_steps_trained: 109956
    num_steps_sampled: 109956
    num_steps_trained: 109956

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,11,1543.99,109956,0.875364,6.33,-2.25,90.7545


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 119952
  custom_metrics: {}
  date: 2021-11-07_14-41-36
  done: false
  episode_len_mean: 93.58333333333333
  episode_media: {}
  episode_reward_max: 6.890000000000011
  episode_reward_mean: 1.3081481481481514
  episode_reward_min: -1.7600000000000007
  episodes_this_iter: 108
  episodes_total: 1228
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.684723307332422
          entropy_coeff: 0.01
          kl: 0.01826688786241325
          policy_loss: -0.040454565876155583
          total_loss: 0.29623451474576423
          vf_explained_var: 0.6440403461456299
          vf_loss: 0.3450410887726352
    num_agent_steps_sampled: 119952
    num_agent_steps_trained: 119952
    num_steps_sampled: 119952
    num_steps_trained: 119952
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,12,1686.71,119952,1.30815,6.89,-1.76,93.5833


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 129948
  custom_metrics: {}
  date: 2021-11-07_14-43-58
  done: false
  episode_len_mean: 93.79439252336448
  episode_media: {}
  episode_reward_max: 6.930000000000012
  episode_reward_mean: 0.8791588785046748
  episode_reward_min: -1.7300000000000009
  episodes_this_iter: 107
  episodes_total: 1335
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.6899115258811888
          entropy_coeff: 0.01
          kl: 0.018723338247744675
          policy_loss: -0.0397244668016449
          total_loss: 0.2832878552815025
          vf_explained_var: 0.5968801379203796
          vf_loss: 0.33095405597526295
    num_agent_steps_sampled: 129948
    num_agent_steps_trained: 129948
    num_steps_sampled: 129948
    num_steps_trained: 129948
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,13,1828.97,129948,0.879159,6.93,-1.73,93.7944




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 139944
  custom_metrics: {}
  date: 2021-11-07_14-46-40
  done: false
  episode_len_mean: 92.01851851851852
  episode_media: {}
  episode_reward_max: 6.760000000000013
  episode_reward_mean: 1.2630555555555585
  episode_reward_min: -1.9800000000000009
  episodes_this_iter: 108
  episodes_total: 1443
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.6751801329800204
          entropy_coeff: 0.01
          kl: 0.01840517653679139
          policy_loss: -0.04302873565759669
          total_loss: 0.2892321335533873
          vf_explained_var: 0.5749748945236206
          vf_loss: 0.3403774273892244
    num_agent_steps_sampled: 139944
    num_agent_steps_trained: 139944
    num_steps_sampled: 139944
    num_steps_trained: 139944
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,14,1991.26,139944,1.26306,6.76,-1.98,92.0185


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 149940
  custom_metrics: {}
  date: 2021-11-07_14-48-57
  done: false
  episode_len_mean: 90.70642201834862
  episode_media: {}
  episode_reward_max: 8.770000000000012
  episode_reward_mean: 0.9337614678899103
  episode_reward_min: -2.139999999999998
  episodes_this_iter: 109
  episodes_total: 1552
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.658400615871462
          entropy_coeff: 0.01
          kl: 0.018323252420940628
          policy_loss: -0.04517949232672397
          total_loss: 0.2716158012644603
          vf_explained_var: 0.6060242056846619
          vf_loss: 0.32482700494364797
    num_agent_steps_sampled: 149940
    num_agent_steps_trained: 149940
    num_steps_sampled: 149940
    num_steps_trained: 149940
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,15,2128.31,149940,0.933761,8.77,-2.14,90.7064


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 159936
  custom_metrics: {}
  date: 2021-11-07_14-51-13
  done: false
  episode_len_mean: 92.46788990825688
  episode_media: {}
  episode_reward_max: 6.610000000000014
  episode_reward_mean: 1.3847706422018382
  episode_reward_min: -2.0300000000000002
  episodes_this_iter: 109
  episodes_total: 1661
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.6285750951522435
          entropy_coeff: 0.01
          kl: 0.01967987895916084
          policy_loss: -0.04613908043879474
          total_loss: 0.25112690013061223
          vf_explained_var: 0.6467298269271851
          vf_loss: 0.3036258530858745
    num_agent_steps_sampled: 159936
    num_agent_steps_trained: 159936
    num_steps_sampled: 159936
    num_steps_trained: 159936
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,16,2263.98,159936,1.38477,6.61,-2.03,92.4679


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 169932
  custom_metrics: {}
  date: 2021-11-07_14-53-27
  done: false
  episode_len_mean: 93.5233644859813
  episode_media: {}
  episode_reward_max: 6.520000000000014
  episode_reward_mean: 1.5498130841121525
  episode_reward_min: -1.7500000000000007
  episodes_this_iter: 107
  episodes_total: 1768
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.638743826874301
          entropy_coeff: 0.01
          kl: 0.019775111787829452
          policy_loss: -0.050694598601414606
          total_loss: 0.20706236890445534
          vf_explained_var: 0.6662006974220276
          vf_loss: 0.2641221027725782
    num_agent_steps_sampled: 169932
    num_agent_steps_trained: 169932
    num_steps_sampled: 169932
    num_steps_trained: 169932
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,17,2398.43,169932,1.54981,6.52,-1.75,93.5234




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 179928
  custom_metrics: {}
  date: 2021-11-07_14-55-59
  done: false
  episode_len_mean: 92.32407407407408
  episode_media: {}
  episode_reward_max: 12.850000000000016
  episode_reward_mean: 1.4772222222222253
  episode_reward_min: -2.1099999999999994
  episodes_this_iter: 108
  episodes_total: 1876
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.638011506683806
          entropy_coeff: 0.01
          kl: 0.02283782265869246
          policy_loss: -0.049128306470811366
          total_loss: 0.24516482626955605
          vf_explained_var: 0.6130090355873108
          vf_loss: 0.2975499516209731
    num_agent_steps_sampled: 179928
    num_agent_steps_trained: 179928
    num_steps_sampled: 179928
    num_steps_trained: 179928
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,18,2549.65,179928,1.47722,12.85,-2.11,92.3241


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 189924
  custom_metrics: {}
  date: 2021-11-07_14-58-13
  done: false
  episode_len_mean: 92.43518518518519
  episode_media: {}
  episode_reward_max: 8.580000000000014
  episode_reward_mean: 1.640555555555559
  episode_reward_min: -1.5900000000000003
  episodes_this_iter: 108
  episodes_total: 1984
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.6059755575962558
          entropy_coeff: 0.01
          kl: 0.02030402589392296
          policy_loss: -0.048229821126621504
          total_loss: 0.31303357128531506
          vf_explained_var: 0.6252217888832092
          vf_loss: 0.3564864088072736
    num_agent_steps_sampled: 189924
    num_agent_steps_trained: 189924
    num_steps_sampled: 189924
    num_steps_trained: 189924
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,19,2683.7,189924,1.64056,8.58,-1.59,92.4352


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 199920
  custom_metrics: {}
  date: 2021-11-07_15-00-28
  done: false
  episode_len_mean: 93.50467289719626
  episode_media: {}
  episode_reward_max: 8.540000000000013
  episode_reward_mean: 1.4850467289719669
  episode_reward_min: -1.8700000000000006
  episodes_this_iter: 107
  episodes_total: 2091
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.6134792022216016
          entropy_coeff: 0.01
          kl: 0.01586250293409381
          policy_loss: -0.05380484594398329
          total_loss: 0.2339131554461315
          vf_explained_var: 0.7450557351112366
          vf_loss: 0.27771602658889233
    num_agent_steps_sampled: 199920
    num_agent_steps_trained: 199920
    num_steps_sampled: 199920
    num_steps_trained: 199920
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,20,2818.99,199920,1.48505,8.54,-1.87,93.5047




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 209916
  custom_metrics: {}
  date: 2021-11-07_15-03-03
  done: false
  episode_len_mean: 89.59821428571429
  episode_media: {}
  episode_reward_max: 8.290000000000017
  episode_reward_mean: 1.286875000000003
  episode_reward_min: -2.2399999999999993
  episodes_this_iter: 112
  episodes_total: 2203
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.59970557852688
          entropy_coeff: 0.01
          kl: 0.01636536260594722
          policy_loss: -0.05545460235279722
          total_loss: 0.22322561198956947
          vf_explained_var: 0.6840986609458923
          vf_loss: 0.26739492715997065
    num_agent_steps_sampled: 209916
    num_agent_steps_trained: 209916
    num_steps_sampled: 209916
    num_steps_trained: 209916
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,21,2973.33,209916,1.28688,8.29,-2.24,89.5982




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 219912
  custom_metrics: {}
  date: 2021-11-07_15-05-28
  done: false
  episode_len_mean: 93.37383177570094
  episode_media: {}
  episode_reward_max: 7.200000000000007
  episode_reward_mean: 1.7000000000000035
  episode_reward_min: -2.0700000000000007
  episodes_this_iter: 107
  episodes_total: 2310
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.6017333087758123
          entropy_coeff: 0.01
          kl: 0.015276478339103585
          policy_loss: -0.053364515661174415
          total_loss: 0.2223557772839235
          vf_explained_var: 0.7182268500328064
          vf_loss: 0.2669358967938739
    num_agent_steps_sampled: 219912
    num_agent_steps_trained: 219912
    num_steps_sampled: 219912
    num_steps_trained: 219912
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,22,3118.73,219912,1.7,7.2,-2.07,93.3738


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 229908
  custom_metrics: {}
  date: 2021-11-07_15-07-41
  done: false
  episode_len_mean: 94.49056603773585
  episode_media: {}
  episode_reward_max: 10.430000000000016
  episode_reward_mean: 1.7924528301886842
  episode_reward_min: -2.1200000000000006
  episodes_this_iter: 106
  episodes_total: 2416
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.6091130841491568
          entropy_coeff: 0.01
          kl: 0.015214492016119362
          policy_loss: -0.052206186439173345
          total_loss: 0.20765486498737437
          vf_explained_var: 0.7371765971183777
          vf_loss: 0.25129166699818567
    num_agent_steps_sampled: 229908
    num_agent_steps_trained: 229908
    num_steps_sampled: 229908
    num_steps_trained: 22990

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,23,3251.81,229908,1.79245,10.43,-2.12,94.4906




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 239904
  custom_metrics: {}
  date: 2021-11-07_15-10-10
  done: false
  episode_len_mean: 92.79439252336448
  episode_media: {}
  episode_reward_max: 6.880000000000013
  episode_reward_mean: 1.3263551401869202
  episode_reward_min: -2.24
  episodes_this_iter: 107
  episodes_total: 2523
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.6107060636210644
          entropy_coeff: 0.01
          kl: 0.013849373056023252
          policy_loss: -0.05776918463281586
          total_loss: 0.17764241255214835
          vf_explained_var: 0.7662463784217834
          vf_loss: 0.22996805449708915
    num_agent_steps_sampled: 239904
    num_agent_steps_trained: 239904
    num_steps_sampled: 239904
    num_steps_trained: 239904
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,24,3400.57,239904,1.32636,6.88,-2.24,92.7944




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 249900
  custom_metrics: {}
  date: 2021-11-07_15-12-37
  done: false
  episode_len_mean: 93.86915887850468
  episode_media: {}
  episode_reward_max: 8.840000000000009
  episode_reward_mean: 1.9255140186915933
  episode_reward_min: -1.7700000000000007
  episodes_this_iter: 107
  episodes_total: 2630
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.6094704676897096
          entropy_coeff: 0.01
          kl: 0.015078788866954924
          policy_loss: -0.058682930969402324
          total_loss: 0.19279801591864637
          vf_explained_var: 0.7444475889205933
          vf_loss: 0.24322428456865824
    num_agent_steps_sampled: 249900
    num_agent_steps_trained: 249900
    num_steps_sampled: 249900
    num_steps_trained: 249900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,25,3547.35,249900,1.92551,8.84,-1.77,93.8692


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 259896
  custom_metrics: {}
  date: 2021-11-07_15-14-47
  done: false
  episode_len_mean: 94.74528301886792
  episode_media: {}
  episode_reward_max: 6.760000000000014
  episode_reward_mean: 1.7713207547169856
  episode_reward_min: -1.940000000000001
  episodes_this_iter: 106
  episodes_total: 2736
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.6108538896609574
          entropy_coeff: 0.01
          kl: 0.01379613712697335
          policy_loss: -0.06169816500738136
          total_loss: 0.15544778294542916
          vf_explained_var: 0.7594130039215088
          vf_loss: 0.2118251613412912
    num_agent_steps_sampled: 259896
    num_agent_steps_trained: 259896
    num_steps_sampled: 259896
    num_steps_trained: 259896
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,26,3677.92,259896,1.77132,6.76,-1.94,94.7453


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 269892
  custom_metrics: {}
  date: 2021-11-07_15-17-01
  done: false
  episode_len_mean: 93.93396226415095
  episode_media: {}
  episode_reward_max: 7.040000000000008
  episode_reward_mean: 1.4718867924528343
  episode_reward_min: -2.0300000000000002
  episodes_this_iter: 106
  episodes_total: 2842
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5977021584144007
          entropy_coeff: 0.01
          kl: 0.01471472682495184
          policy_loss: -0.05705817122665099
          total_loss: 0.16943415715398952
          vf_explained_var: 0.758998453617096
          vf_loss: 0.21894736185860939
    num_agent_steps_sampled: 269892
    num_agent_steps_trained: 269892
    num_steps_sampled: 269892
    num_steps_trained: 269892
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,27,3811.54,269892,1.47189,7.04,-2.03,93.934




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 279888
  custom_metrics: {}
  date: 2021-11-07_15-19-46
  done: false
  episode_len_mean: 89.7090909090909
  episode_media: {}
  episode_reward_max: 10.450000000000017
  episode_reward_mean: 1.927181818181823
  episode_reward_min: -1.5900000000000005
  episodes_this_iter: 110
  episodes_total: 2952
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5961202947502464
          entropy_coeff: 0.01
          kl: 0.014860427716147399
          policy_loss: -0.059911946554341886
          total_loss: 0.15723997231763906
          vf_explained_var: 0.796622097492218
          vf_loss: 0.2092592091164273
    num_agent_steps_sampled: 279888
    num_agent_steps_trained: 279888
    num_steps_sampled: 279888
    num_steps_trained: 279888
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,28,3976.81,279888,1.92718,10.45,-1.59,89.7091




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 289884
  custom_metrics: {}
  date: 2021-11-07_15-22-27
  done: false
  episode_len_mean: 94.51886792452831
  episode_media: {}
  episode_reward_max: 8.65000000000001
  episode_reward_mean: 1.8350000000000048
  episode_reward_min: -1.840000000000001
  episodes_this_iter: 106
  episodes_total: 3058
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.606220671865675
          entropy_coeff: 0.01
          kl: 0.014981504349387097
          policy_loss: -0.06278664395213127
          total_loss: 0.15470395680103038
          vf_explained_var: 0.771485447883606
          vf_loss: 0.2094230675512654
    num_agent_steps_sampled: 289884
    num_agent_steps_trained: 289884
    num_steps_sampled: 289884
    num_steps_trained: 289884
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,29,4137.23,289884,1.835,8.65,-1.84,94.5189




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 299880
  custom_metrics: {}
  date: 2021-11-07_15-24-52
  done: false
  episode_len_mean: 94.28971962616822
  episode_media: {}
  episode_reward_max: 8.800000000000013
  episode_reward_mean: 1.85009345794393
  episode_reward_min: -1.780000000000001
  episodes_this_iter: 107
  episodes_total: 3165
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5891247506834505
          entropy_coeff: 0.01
          kl: 0.015358496355499707
          policy_loss: -0.059769737040703624
          total_loss: 0.1456004789704059
          vf_explained_var: 0.8247190117835999
          vf_loss: 0.19627288896749673
    num_agent_steps_sampled: 299880
    num_agent_steps_trained: 299880
    num_steps_sampled: 299880
    num_steps_trained: 299880
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,30,4282.63,299880,1.85009,8.8,-1.78,94.2897




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 309876
  custom_metrics: {}
  date: 2021-11-07_15-27-24
  done: false
  episode_len_mean: 91.87962962962963
  episode_media: {}
  episode_reward_max: 8.67000000000001
  episode_reward_mean: 1.9979629629629683
  episode_reward_min: -2.0600000000000005
  episodes_this_iter: 108
  episodes_total: 3273
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.596006913470407
          entropy_coeff: 0.01
          kl: 0.015571455984420897
          policy_loss: -0.06325977725796719
          total_loss: 0.1750385210960785
          vf_explained_var: 0.7946008443832397
          vf_loss: 0.22878464420572814
    num_agent_steps_sampled: 309876
    num_agent_steps_trained: 309876
    num_steps_sampled: 309876
    num_steps_trained: 309876
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,31,4434.73,309876,1.99796,8.67,-2.06,91.8796


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 319872
  custom_metrics: {}
  date: 2021-11-07_15-29-41
  done: false
  episode_len_mean: 95.31428571428572
  episode_media: {}
  episode_reward_max: 6.490000000000016
  episode_reward_mean: 1.691523809523814
  episode_reward_min: -2.04
  episodes_this_iter: 105
  episodes_total: 3378
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5799128216555993
          entropy_coeff: 0.01
          kl: 0.014030958009944609
          policy_loss: -0.06791372190142035
          total_loss: 0.1109447383791463
          vf_explained_var: 0.7991345524787903
          vf_loss: 0.1726933110282462
    num_agent_steps_sampled: 319872
    num_agent_steps_trained: 319872
    num_steps_sampled: 319872
    num_steps_trained: 319872
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,32,4571.22,319872,1.69152,6.49,-2.04,95.3143




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 329868
  custom_metrics: {}
  date: 2021-11-07_15-32-06
  done: false
  episode_len_mean: 95.04761904761905
  episode_media: {}
  episode_reward_max: 8.630000000000017
  episode_reward_mean: 1.5121904761904803
  episode_reward_min: -2.3599999999999968
  episodes_this_iter: 105
  episodes_total: 3483
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.590950656140971
          entropy_coeff: 0.01
          kl: 0.014188218601459362
          policy_loss: -0.07142950187986478
          total_loss: 0.09122342598170806
          vf_explained_var: 0.8201923370361328
          vf_loss: 0.1562398978548809
    num_agent_steps_sampled: 329868
    num_agent_steps_trained: 329868
    num_steps_sampled: 329868
    num_steps_trained: 329868
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,33,4716.72,329868,1.51219,8.63,-2.36,95.0476




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 339864
  custom_metrics: {}
  date: 2021-11-07_15-34-38
  done: false
  episode_len_mean: 91.8256880733945
  episode_media: {}
  episode_reward_max: 6.980000000000011
  episode_reward_mean: 1.6242201834862422
  episode_reward_min: -1.790000000000001
  episodes_this_iter: 109
  episodes_total: 3592
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.577089279737228
          entropy_coeff: 0.01
          kl: 0.014481722452376226
          policy_loss: -0.0660507208468695
          total_loss: 0.11527525235174431
          vf_explained_var: 0.7954672574996948
          vf_loss: 0.1741056910644357
    num_agent_steps_sampled: 339864
    num_agent_steps_trained: 339864
    num_steps_sampled: 339864
    num_steps_trained: 339864
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,34,4868.59,339864,1.62422,6.98,-1.79,91.8257


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 349860
  custom_metrics: {}
  date: 2021-11-07_15-36-49
  done: false
  episode_len_mean: 96.83495145631068
  episode_media: {}
  episode_reward_max: 8.980000000000011
  episode_reward_mean: 2.0331067961165106
  episode_reward_min: -1.9500000000000013
  episodes_this_iter: 103
  episodes_total: 3695
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.577457818414411
          entropy_coeff: 0.01
          kl: 0.016689053047347937
          policy_loss: -0.06761236127752523
          total_loss: 0.12520136356942801
          vf_explained_var: 0.8258298635482788
          vf_loss: 0.1805685537795608
    num_agent_steps_sampled: 349860
    num_agent_steps_trained: 349860
    num_steps_sampled: 349860
    num_steps_trained: 349860
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,35,4998.99,349860,2.03311,8.98,-1.95,96.835


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 359856
  custom_metrics: {}
  date: 2021-11-07_15-39-02
  done: false
  episode_len_mean: 95.87619047619047
  episode_media: {}
  episode_reward_max: 8.920000000000014
  episode_reward_mean: 1.9503809523809572
  episode_reward_min: -1.850000000000001
  episodes_this_iter: 105
  episodes_total: 3800
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5778674042122995
          entropy_coeff: 0.01
          kl: 0.013657537506351576
          policy_loss: -0.0714271578953689
          total_loss: 0.09254649960730447
          vf_explained_var: 0.844637393951416
          vf_loss: 0.15863875422594895
    num_agent_steps_sampled: 359856
    num_agent_steps_trained: 359856
    num_steps_sampled: 359856
    num_steps_trained: 359856
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,36,5132.65,359856,1.95038,8.92,-1.85,95.8762




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 369852
  custom_metrics: {}
  date: 2021-11-07_15-41-28
  done: false
  episode_len_mean: 95.04716981132076
  episode_media: {}
  episode_reward_max: 10.740000000000013
  episode_reward_mean: 1.6333018867924571
  episode_reward_min: -2.0999999999999996
  episodes_this_iter: 106
  episodes_total: 3906
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5804361274099756
          entropy_coeff: 0.01
          kl: 0.013561249009486423
          policy_loss: -0.07196720903938333
          total_loss: 0.08177560055429418
          vf_explained_var: 0.8351461887359619
          vf_loss: 0.14865294955670832
    num_agent_steps_sampled: 369852
    num_agent_steps_trained: 369852
    num_steps_sampled: 369852
    num_steps_trained: 369852

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,37,5278.52,369852,1.6333,10.74,-2.1,95.0472




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 379848
  custom_metrics: {}
  date: 2021-11-07_15-43-54
  done: false
  episode_len_mean: 96.24271844660194
  episode_media: {}
  episode_reward_max: 8.750000000000016
  episode_reward_mean: 2.2839805825242787
  episode_reward_min: -1.5700000000000007
  episodes_this_iter: 103
  episodes_total: 4009
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5801565744937993
          entropy_coeff: 0.01
          kl: 0.0159658019011001
          policy_loss: -0.06857081058506782
          total_loss: 0.106750553755615
          vf_explained_var: 0.8618919253349304
          vf_loss: 0.16475083600156581
    num_agent_steps_sampled: 379848
    num_agent_steps_trained: 379848
    num_steps_sampled: 379848
    num_steps_trained: 379848
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,38,5424.5,379848,2.28398,8.75,-1.57,96.2427


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 389844
  custom_metrics: {}
  date: 2021-11-07_15-46-08
  done: false
  episode_len_mean: 95.36190476190477
  episode_media: {}
  episode_reward_max: 8.560000000000015
  episode_reward_mean: 2.008952380952386
  episode_reward_min: -2.1499999999999977
  episodes_this_iter: 105
  episodes_total: 4114
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5671299068336815
          entropy_coeff: 0.01
          kl: 0.01662269888277713
          policy_loss: -0.06767251343251421
          total_loss: 0.14815283579608568
          vf_explained_var: 0.8191425204277039
          vf_loss: 0.20362806187735663
    num_agent_steps_sampled: 389844
    num_agent_steps_trained: 389844
    num_steps_sampled: 389844
    num_steps_trained: 389844
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,39,5557.91,389844,2.00895,8.56,-2.15,95.3619




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 399840
  custom_metrics: {}
  date: 2021-11-07_15-48-33
  done: false
  episode_len_mean: 94.22641509433963
  episode_media: {}
  episode_reward_max: 8.270000000000016
  episode_reward_mean: 2.0293396226415146
  episode_reward_min: -1.7900000000000011
  episodes_this_iter: 106
  episodes_total: 4220
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.567954599347889
          entropy_coeff: 0.01
          kl: 0.015875299987843195
          policy_loss: -0.0676957944010058
          total_loss: 0.15062292527455168
          vf_explained_var: 0.8293734788894653
          vf_loss: 0.20783234760165215
    num_agent_steps_sampled: 399840
    num_agent_steps_trained: 399840
    num_steps_sampled: 399840
    num_steps_trained: 399840
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,40,5703.09,399840,2.02934,8.27,-1.79,94.2264




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 409836
  custom_metrics: {}
  date: 2021-11-07_15-51-12
  done: false
  episode_len_mean: 93.01851851851852
  episode_media: {}
  episode_reward_max: 8.750000000000014
  episode_reward_mean: 1.9288888888888946
  episode_reward_min: -1.800000000000001
  episodes_this_iter: 108
  episodes_total: 4328
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5520285384267822
          entropy_coeff: 0.01
          kl: 0.01528005583188881
          policy_loss: -0.06972825568264876
          total_loss: 0.11499245790287088
          vf_explained_var: 0.8531769514083862
          vf_loss: 0.1754311204673006
    num_agent_steps_sampled: 409836
    num_agent_steps_trained: 409836
    num_steps_sampled: 409836
    num_steps_trained: 409836
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,41,5861.93,409836,1.92889,8.75,-1.8,93.0185


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 419832
  custom_metrics: {}
  date: 2021-11-07_15-53-27
  done: false
  episode_len_mean: 94.5904761904762
  episode_media: {}
  episode_reward_max: 8.83000000000001
  episode_reward_mean: 1.9240952380952427
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 105
  episodes_total: 4433
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.561146357935718
          entropy_coeff: 0.01
          kl: 0.014464100696988828
          policy_loss: -0.07120111835881686
          total_loss: 0.10623972564617283
          vf_explained_var: 0.8271787166595459
          vf_loss: 0.17010127793615445
    num_agent_steps_sampled: 419832
    num_agent_steps_trained: 419832
    num_steps_sampled: 419832
    num_steps_trained: 419832
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,42,5996.8,419832,1.9241,8.83,-2.03,94.5905




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 429828
  custom_metrics: {}
  date: 2021-11-07_15-55-53
  done: false
  episode_len_mean: 95.48076923076923
  episode_media: {}
  episode_reward_max: 10.460000000000013
  episode_reward_mean: 1.8645192307692364
  episode_reward_min: -1.950000000000001
  episodes_this_iter: 104
  episodes_total: 4537
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5704195495344635
          entropy_coeff: 0.01
          kl: 0.014213499088408391
          policy_loss: -0.07241350992415578
          total_loss: 0.08840426810754415
          vf_explained_var: 0.8568517565727234
          vf_loss: 0.15414184471512707
    num_agent_steps_sampled: 429828
    num_agent_steps_trained: 429828
    num_steps_sampled: 429828
    num_steps_trained: 429828


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,43,6142.84,429828,1.86452,10.46,-1.95,95.4808




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 439824
  custom_metrics: {}
  date: 2021-11-07_15-58-20
  done: false
  episode_len_mean: 93.24074074074075
  episode_media: {}
  episode_reward_max: 7.000000000000011
  episode_reward_mean: 1.8581481481481532
  episode_reward_min: -2.040000000000001
  episodes_this_iter: 108
  episodes_total: 4645
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5593346715992333
          entropy_coeff: 0.01
          kl: 0.014686770218649028
          policy_loss: -0.07193645442812106
          total_loss: 0.08329966200881789
          vf_explained_var: 0.8537670969963074
          vf_loss: 0.147371164492817
    num_agent_steps_sampled: 439824
    num_agent_steps_trained: 439824
    num_steps_sampled: 439824
    num_steps_trained: 439824
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,44,6289.52,439824,1.85815,7,-2.04,93.2407


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 449820
  custom_metrics: {}
  date: 2021-11-07_16-00-39
  done: false
  episode_len_mean: 94.82857142857142
  episode_media: {}
  episode_reward_max: 8.240000000000014
  episode_reward_mean: 1.5230476190476232
  episode_reward_min: -1.8500000000000008
  episodes_this_iter: 105
  episodes_total: 4750
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.56350114080641
          entropy_coeff: 0.01
          kl: 0.014893604130610746
          policy_loss: -0.07071394389813654
          total_loss: 0.09965109720061986
          vf_explained_var: 0.8395570516586304
          vf_loss: 0.16207055969912018
    num_agent_steps_sampled: 449820
    num_agent_steps_trained: 449820
    num_steps_sampled: 449820
    num_steps_trained: 449820
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,45,6428.66,449820,1.52305,8.24,-1.85,94.8286


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 459816
  custom_metrics: {}
  date: 2021-11-07_16-02-58
  done: false
  episode_len_mean: 94.95283018867924
  episode_media: {}
  episode_reward_max: 6.730000000000014
  episode_reward_mean: 1.7146226415094385
  episode_reward_min: -2.219999999999999
  episodes_this_iter: 106
  episodes_total: 4856
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.564808814953535
          entropy_coeff: 0.01
          kl: 0.015036150505772602
          policy_loss: -0.07232849399248759
          total_loss: 0.10650955833590184
          vf_explained_var: 0.817345380783081
          vf_loss: 0.17023190990981893
    num_agent_steps_sampled: 459816
    num_agent_steps_trained: 459816
    num_steps_sampled: 459816
    num_steps_trained: 459816
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,46,6568.21,459816,1.71462,6.73,-2.22,94.9528




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 469812
  custom_metrics: {}
  date: 2021-11-07_16-05-32
  done: false
  episode_len_mean: 93.1588785046729
  episode_media: {}
  episode_reward_max: 6.490000000000016
  episode_reward_mean: 2.128411214953277
  episode_reward_min: -1.760000000000001
  episodes_this_iter: 107
  episodes_total: 4963
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5591368204508074
          entropy_coeff: 0.01
          kl: 0.013272293279782063
          policy_loss: -0.07708250068796751
          total_loss: 0.06380885333682482
          vf_explained_var: 0.881742537021637
          vf_loss: 0.13624677850076786
    num_agent_steps_sampled: 469812
    num_agent_steps_trained: 469812
    num_steps_sampled: 469812
    num_steps_trained: 469812
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,47,6721.46,469812,2.12841,6.49,-1.76,93.1589




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 479808
  custom_metrics: {}
  date: 2021-11-07_16-08-14
  done: false
  episode_len_mean: 90.80733944954129
  episode_media: {}
  episode_reward_max: 10.380000000000011
  episode_reward_mean: 2.0915596330275275
  episode_reward_min: -1.8200000000000012
  episodes_this_iter: 109
  episodes_total: 5072
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5439931936753104
          entropy_coeff: 0.01
          kl: 0.014731597551140304
          policy_loss: -0.06985943971329138
          total_loss: 0.09755601076743542
          vf_explained_var: 0.8305252194404602
          vf_loss: 0.15929496026414836
    num_agent_steps_sampled: 479808
    num_agent_steps_trained: 479808
    num_steps_sampled: 479808
    num_steps_trained: 479808

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,48,6883.79,479808,2.09156,10.38,-1.82,90.8073


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 489804
  custom_metrics: {}
  date: 2021-11-07_16-10-29
  done: false
  episode_len_mean: 94.27102803738318
  episode_media: {}
  episode_reward_max: 8.390000000000008
  episode_reward_mean: 2.112803738317763
  episode_reward_min: -1.8000000000000007
  episodes_this_iter: 107
  episodes_total: 5179
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5340808964183186
          entropy_coeff: 0.01
          kl: 0.015145745135985541
          policy_loss: -0.07466301664582685
          total_loss: 0.1092842457219003
          vf_explained_var: 0.8428395390510559
          vf_loss: 0.17478416958011878
    num_agent_steps_sampled: 489804
    num_agent_steps_trained: 489804
    num_steps_sampled: 489804
    num_steps_trained: 489804
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,49,7018.57,489804,2.1128,8.39,-1.8,94.271




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 499800
  custom_metrics: {}
  date: 2021-11-07_16-12-57
  done: false
  episode_len_mean: 93.23584905660377
  episode_media: {}
  episode_reward_max: 7.180000000000012
  episode_reward_mean: 2.1808490566037797
  episode_reward_min: -1.9100000000000008
  episodes_this_iter: 106
  episodes_total: 5285
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.545594003261664
          entropy_coeff: 0.01
          kl: 0.014180963433161142
          policy_loss: -0.0785960048628159
          total_loss: 0.07677128519066888
          vf_explained_var: 0.8591193556785583
          vf_loss: 0.1485172223458942
    num_agent_steps_sampled: 499800
    num_agent_steps_trained: 499800
    num_steps_sampled: 499800
    num_steps_trained: 499800
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,50,7166.31,499800,2.18085,7.18,-1.91,93.2358




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 509796
  custom_metrics: {}
  date: 2021-11-07_16-15-36
  done: false
  episode_len_mean: 91.33333333333333
  episode_media: {}
  episode_reward_max: 6.870000000000009
  episode_reward_mean: 1.8480180180180235
  episode_reward_min: -1.980000000000001
  episodes_this_iter: 111
  episodes_total: 5396
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.532992374387562
          entropy_coeff: 0.01
          kl: 0.014236835710875905
          policy_loss: -0.07746718386108549
          total_loss: 0.07171642978667704
          vf_explained_var: 0.8644962906837463
          vf_loss: 0.14208024507468073
    num_agent_steps_sampled: 509796
    num_agent_steps_trained: 509796
    num_steps_sampled: 509796
    num_steps_trained: 509796
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,51,7325.79,509796,1.84802,6.87,-1.98,91.3333


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 519792
  custom_metrics: {}
  date: 2021-11-07_16-17-59
  done: false
  episode_len_mean: 91.18181818181819
  episode_media: {}
  episode_reward_max: 8.620000000000015
  episode_reward_mean: 2.0980909090909146
  episode_reward_min: -1.8600000000000008
  episodes_this_iter: 110
  episodes_total: 5506
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.524516623244326
          entropy_coeff: 0.01
          kl: 0.015742568719724428
          policy_loss: -0.07358359354269556
          total_loss: 0.09564714084307735
          vf_explained_var: 0.8636862635612488
          vf_loss: 0.15861236117461808
    num_agent_steps_sampled: 519792
    num_agent_steps_trained: 519792
    num_steps_sampled: 519792
    num_steps_trained: 519792
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,52,7468.5,519792,2.09809,8.62,-1.86,91.1818




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 529788
  custom_metrics: {}
  date: 2021-11-07_16-20-33
  done: false
  episode_len_mean: 91.49074074074075
  episode_media: {}
  episode_reward_max: 10.430000000000016
  episode_reward_mean: 2.2202777777777833
  episode_reward_min: -2.1299999999999994
  episodes_this_iter: 108
  episodes_total: 5614
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.527949186675569
          entropy_coeff: 0.01
          kl: 0.014769316274662285
          policy_loss: -0.07696815397845119
          total_loss: 0.08713866675065624
          vf_explained_var: 0.8555226922035217
          vf_loss: 0.15573996261128376
    num_agent_steps_sampled: 529788
    num_agent_steps_trained: 529788
    num_steps_sampled: 529788
    num_steps_trained: 529788


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,53,7622.48,529788,2.22028,10.43,-2.13,91.4907




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 539784
  custom_metrics: {}
  date: 2021-11-07_16-23-07
  done: false
  episode_len_mean: 90.61261261261261
  episode_media: {}
  episode_reward_max: 8.600000000000016
  episode_reward_mean: 2.196396396396401
  episode_reward_min: -1.870000000000001
  episodes_this_iter: 111
  episodes_total: 5725
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5242018518284857
          entropy_coeff: 0.01
          kl: 0.014813252982191945
          policy_loss: -0.07621061086463622
          total_loss: 0.08345162549541674
          vf_explained_var: 0.8602564930915833
          vf_loss: 0.15115781093223227
    num_agent_steps_sampled: 539784
    num_agent_steps_trained: 539784
    num_steps_sampled: 539784
    num_steps_trained: 539784
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,54,7776.38,539784,2.1964,8.6,-1.87,90.6126




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 549780
  custom_metrics: {}
  date: 2021-11-07_16-25-35
  done: false
  episode_len_mean: 91.39090909090909
  episode_media: {}
  episode_reward_max: 8.780000000000015
  episode_reward_mean: 2.0540909090909145
  episode_reward_min: -1.8500000000000008
  episodes_this_iter: 110
  episodes_total: 5835
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.522494824727376
          entropy_coeff: 0.01
          kl: 0.0140641469732893
          policy_loss: -0.07968450096141325
          total_loss: 0.07265520009220156
          vf_explained_var: 0.8725035190582275
          vf_loss: 0.14552476265205022
    num_agent_steps_sampled: 549780
    num_agent_steps_trained: 549780
    num_steps_sampled: 549780
    num_steps_trained: 549780
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,55,7924.3,549780,2.05409,8.78,-1.85,91.3909


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 559776
  custom_metrics: {}
  date: 2021-11-07_16-27-55
  done: false
  episode_len_mean: 91.71296296296296
  episode_media: {}
  episode_reward_max: 8.800000000000015
  episode_reward_mean: 2.191388888888895
  episode_reward_min: -1.890000000000001
  episodes_this_iter: 108
  episodes_total: 5943
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.53522239281581
          entropy_coeff: 0.01
          kl: 0.015464562596789137
          policy_loss: -0.07699231628296721
          total_loss: 0.0968958381547505
          vf_explained_var: 0.8440979719161987
          vf_loss: 0.1640101716384037
    num_agent_steps_sampled: 559776
    num_agent_steps_trained: 559776
    num_steps_sampled: 559776
    num_steps_trained: 559776
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,56,8064.14,559776,2.19139,8.8,-1.89,91.713




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 569772
  custom_metrics: {}
  date: 2021-11-07_16-30-39
  done: false
  episode_len_mean: 90.17117117117117
  episode_media: {}
  episode_reward_max: 8.750000000000012
  episode_reward_mean: 2.180180180180186
  episode_reward_min: -2.1899999999999946
  episodes_this_iter: 111
  episodes_total: 6054
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.529910491266821
          entropy_coeff: 0.01
          kl: 0.014673865935924763
          policy_loss: -0.07442047044029858
          total_loss: 0.08197846586664773
          vf_explained_var: 0.8836137652397156
          vf_loss: 0.14826914014883785
    num_agent_steps_sampled: 569772
    num_agent_steps_trained: 569772
    num_steps_sampled: 569772
    num_steps_trained: 569772
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,57,8228.79,569772,2.18018,8.75,-2.19,90.1712




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 579768
  custom_metrics: {}
  date: 2021-11-07_16-33-12
  done: false
  episode_len_mean: 91.69090909090909
  episode_media: {}
  episode_reward_max: 14.060000000000016
  episode_reward_mean: 1.8883636363636416
  episode_reward_min: -2.4199999999999955
  episodes_this_iter: 110
  episodes_total: 6164
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.546999739581703
          entropy_coeff: 0.01
          kl: 0.01514762145677973
          policy_loss: -0.07697513151165639
          total_loss: 0.07620279411904705
          vf_explained_var: 0.8709275722503662
          vf_loss: 0.1441397468599244
    num_agent_steps_sampled: 579768
    num_agent_steps_trained: 579768
    num_steps_sampled: 579768
    num_steps_trained: 579768
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,58,8381.05,579768,1.88836,14.06,-2.42,91.6909


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 589764
  custom_metrics: {}
  date: 2021-11-07_16-35-30
  done: false
  episode_len_mean: 91.91588785046729
  episode_media: {}
  episode_reward_max: 6.670000000000012
  episode_reward_mean: 1.9292523364486036
  episode_reward_min: -2.0200000000000005
  episodes_this_iter: 107
  episodes_total: 6271
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5469099193556697
          entropy_coeff: 0.01
          kl: 0.013852522687118242
          policy_loss: -0.08131109522974007
          total_loss: 0.05720965184239495
          vf_explained_var: 0.8883196711540222
          vf_loss: 0.13243206675427083
    num_agent_steps_sampled: 589764
    num_agent_steps_trained: 589764
    num_steps_sampled: 589764
    num_steps_trained: 589764


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,59,8519.48,589764,1.92925,6.67,-2.02,91.9159




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 599760
  custom_metrics: {}
  date: 2021-11-07_16-37-59
  done: false
  episode_len_mean: 90.58558558558559
  episode_media: {}
  episode_reward_max: 8.78000000000001
  episode_reward_mean: 1.8138738738738784
  episode_reward_min: -1.9500000000000006
  episodes_this_iter: 111
  episodes_total: 6382
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5309097277812467
          entropy_coeff: 0.01
          kl: 0.015351514849588018
          policy_loss: -0.07559093122617301
          total_loss: 0.08376154069287273
          vf_explained_var: 0.8537603616714478
          vf_loss: 0.14968889787729478
    num_agent_steps_sampled: 599760
    num_agent_steps_trained: 599760
    num_steps_sampled: 599760
    num_steps_trained: 599760
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,60,8668.34,599760,1.81387,8.78,-1.95,90.5856




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 609756
  custom_metrics: {}
  date: 2021-11-07_16-40-45
  done: false
  episode_len_mean: 90.49549549549549
  episode_media: {}
  episode_reward_max: 10.550000000000013
  episode_reward_mean: 2.0107207207207254
  episode_reward_min: -1.8200000000000005
  episodes_this_iter: 111
  episodes_total: 6493
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5183654834062623
          entropy_coeff: 0.01
          kl: 0.014342680566373276
          policy_loss: -0.0780673203846583
          total_loss: 0.06909490151123868
          vf_explained_var: 0.876209557056427
          vf_loss: 0.1396714557837854
    num_agent_steps_sampled: 609756
    num_agent_steps_trained: 609756
    num_steps_sampled: 609756
    num_steps_trained: 609756
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,61,8834.52,609756,2.01072,10.55,-1.82,90.4955


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 619752
  custom_metrics: {}
  date: 2021-11-07_16-43-02
  done: false
  episode_len_mean: 91.36697247706422
  episode_media: {}
  episode_reward_max: 8.800000000000011
  episode_reward_mean: 1.7664220183486277
  episode_reward_min: -1.9700000000000009
  episodes_this_iter: 109
  episodes_total: 6602
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5197524671880607
          entropy_coeff: 0.01
          kl: 0.014416355685275108
          policy_loss: -0.07902193694797337
          total_loss: 0.07330178211912768
          vf_explained_var: 0.845158040523529
          vf_loss: 0.1446789818059685
    num_agent_steps_sampled: 619752
    num_agent_steps_trained: 619752
    num_steps_sampled: 619752
    num_steps_trained: 619752
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,62,8970.92,619752,1.76642,8.8,-1.97,91.367




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 629748
  custom_metrics: {}
  date: 2021-11-07_16-45-29
  done: false
  episode_len_mean: 93.0
  episode_media: {}
  episode_reward_max: 6.880000000000011
  episode_reward_mean: 2.093364485981314
  episode_reward_min: -1.930000000000001
  episodes_this_iter: 107
  episodes_total: 6709
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.530753030328669
          entropy_coeff: 0.01
          kl: 0.015146882652015562
          policy_loss: -0.07482523645600701
          total_loss: 0.09030468026255695
          vf_explained_var: 0.8503482937812805
          vf_loss: 0.15593095369732532
    num_agent_steps_sampled: 629748
    num_agent_steps_trained: 629748
    num_steps_sampled: 629748
    num_steps_trained: 629748
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,63,9117.96,629748,2.09336,6.88,-1.93,93




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 639744
  custom_metrics: {}
  date: 2021-11-07_16-48-10
  done: false
  episode_len_mean: 92.1574074074074
  episode_media: {}
  episode_reward_max: 10.600000000000014
  episode_reward_mean: 2.065833333333338
  episode_reward_min: -1.9500000000000008
  episodes_this_iter: 108
  episodes_total: 6817
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5338203717500734
          entropy_coeff: 0.01
          kl: 0.014881890157984329
          policy_loss: -0.07909329196151632
          total_loss: 0.07051155927360185
          vf_explained_var: 0.8739203810691833
          vf_loss: 0.14104024731577972
    num_agent_steps_sampled: 639744
    num_agent_steps_trained: 639744
    num_steps_sampled: 639744
    num_steps_trained: 639744
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,64,9279.41,639744,2.06583,10.6,-1.95,92.1574




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 649740
  custom_metrics: {}
  date: 2021-11-07_16-50-47
  done: false
  episode_len_mean: 92.31818181818181
  episode_media: {}
  episode_reward_max: 10.980000000000013
  episode_reward_mean: 2.319272727272733
  episode_reward_min: -1.800000000000001
  episodes_this_iter: 110
  episodes_total: 6927
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.510494092794565
          entropy_coeff: 0.01
          kl: 0.015082894544785222
          policy_loss: -0.0758405262652116
          total_loss: 0.0986602287245994
          vf_explained_var: 0.8823567628860474
          vf_loss: 0.1652449752864802
    num_agent_steps_sampled: 649740
    num_agent_steps_trained: 649740
    num_steps_sampled: 649740
    num_steps_trained: 649740
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,65,9436.17,649740,2.31927,10.98,-1.8,92.3182


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 659736
  custom_metrics: {}
  date: 2021-11-07_16-53-08
  done: false
  episode_len_mean: 92.66666666666667
  episode_media: {}
  episode_reward_max: 8.910000000000013
  episode_reward_mean: 1.9100925925925976
  episode_reward_min: -2.0300000000000002
  episodes_this_iter: 108
  episodes_total: 7035
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.519284813424461
          entropy_coeff: 0.01
          kl: 0.013587904436048188
          policy_loss: -0.08074892077786036
          total_loss: 0.04687129759715281
          vf_explained_var: 0.885489821434021
          vf_loss: 0.12185812075025378
    num_agent_steps_sampled: 659736
    num_agent_steps_trained: 659736
    num_steps_sampled: 659736
    num_steps_trained: 659736
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,66,9577.07,659736,1.91009,8.91,-2.03,92.6667




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 669732
  custom_metrics: {}
  date: 2021-11-07_16-55-49
  done: false
  episode_len_mean: 92.19626168224299
  episode_media: {}
  episode_reward_max: 8.700000000000014
  episode_reward_mean: 1.826261682242995
  episode_reward_min: -1.930000000000001
  episodes_this_iter: 107
  episodes_total: 7142
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5149064901547553
          entropy_coeff: 0.01
          kl: 0.01448949894752467
          policy_loss: -0.07940300489879316
          total_loss: 0.07568906313683997
          vf_explained_var: 0.8735856413841248
          vf_loss: 0.1472322430859646
    num_agent_steps_sampled: 669732
    num_agent_steps_trained: 669732
    num_steps_sampled: 669732
    num_steps_trained: 669732
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,67,9737.75,669732,1.82626,8.7,-1.93,92.1963




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 679728
  custom_metrics: {}
  date: 2021-11-07_16-58-23
  done: false
  episode_len_mean: 93.06542056074767
  episode_media: {}
  episode_reward_max: 8.970000000000011
  episode_reward_mean: 2.1183177570093514
  episode_reward_min: -2.1000000000000005
  episodes_this_iter: 107
  episodes_total: 7249
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.526982937511216
          entropy_coeff: 0.01
          kl: 0.013811302593073088
          policy_loss: -0.07886498366347236
          total_loss: 0.07247549689008703
          vf_explained_var: 0.8864032626152039
          vf_loss: 0.14514643417623563
    num_agent_steps_sampled: 679728
    num_agent_steps_trained: 679728
    num_steps_sampled: 679728
    num_steps_trained: 679728
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,68,9892.5,679728,2.11832,8.97,-2.1,93.0654


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 689724
  custom_metrics: {}
  date: 2021-11-07_17-00-39
  done: false
  episode_len_mean: 94.75471698113208
  episode_media: {}
  episode_reward_max: 8.80000000000002
  episode_reward_mean: 1.9030188679245326
  episode_reward_min: -1.760000000000001
  episodes_this_iter: 106
  episodes_total: 7355
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5305906124604056
          entropy_coeff: 0.01
          kl: 0.014205923328810084
          policy_loss: -0.08023752068830096
          total_loss: 0.06980001435129561
          vf_explained_var: 0.8702053427696228
          vf_loss: 0.1429805718999133
    num_agent_steps_sampled: 689724
    num_agent_steps_trained: 689724
    num_steps_sampled: 689724
    num_steps_trained: 689724
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,69,10028.4,689724,1.90302,8.8,-1.76,94.7547




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 699720
  custom_metrics: {}
  date: 2021-11-07_17-03-05
  done: false
  episode_len_mean: 94.23364485981308
  episode_media: {}
  episode_reward_max: 8.770000000000014
  episode_reward_mean: 1.759158878504677
  episode_reward_min: -2.000000000000001
  episodes_this_iter: 107
  episodes_total: 7462
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.524011458494724
          entropy_coeff: 0.01
          kl: 0.014640452362593316
          policy_loss: -0.07918056072746842
          total_loss: 0.07342005537138281
          vf_explained_var: 0.8598913550376892
          vf_loss: 0.1444879491901041
    num_agent_steps_sampled: 699720
    num_agent_steps_trained: 699720
    num_steps_sampled: 699720
    num_steps_trained: 699720
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,70,10174.2,699720,1.75916,8.77,-2,94.2336




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 709716
  custom_metrics: {}
  date: 2021-11-07_17-05-34
  done: false
  episode_len_mean: 94.49056603773585
  episode_media: {}
  episode_reward_max: 9.130000000000013
  episode_reward_mean: 1.71660377358491
  episode_reward_min: -1.9000000000000008
  episodes_this_iter: 106
  episodes_total: 7568
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5306258757909137
          entropy_coeff: 0.01
          kl: 0.01386904838319247
          policy_loss: -0.08397089938959505
          total_loss: 0.06388328278469096
          vf_explained_var: 0.8584480285644531
          vf_loss: 0.141565014472884
    num_agent_steps_sampled: 709716
    num_agent_steps_trained: 709716
    num_steps_sampled: 709716
    num_steps_trained: 709716
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,71,10322.7,709716,1.7166,9.13,-1.9,94.4906




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 719712
  custom_metrics: {}
  date: 2021-11-07_17-08-03
  done: false
  episode_len_mean: 92.74766355140187
  episode_media: {}
  episode_reward_max: 10.710000000000015
  episode_reward_mean: 1.7808411214953312
  episode_reward_min: -1.810000000000001
  episodes_this_iter: 107
  episodes_total: 7675
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5413423642134054
          entropy_coeff: 0.01
          kl: 0.014785782154942233
          policy_loss: -0.07929552043827935
          total_loss: 0.06731085311780628
          vf_explained_var: 0.8605018258094788
          vf_loss: 0.13833593727750146
    num_agent_steps_sampled: 719712
    num_agent_steps_trained: 719712
    num_steps_sampled: 719712
    num_steps_trained: 719712


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,72,10471.5,719712,1.78084,10.71,-1.81,92.7477




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 729708
  custom_metrics: {}
  date: 2021-11-07_17-10-37
  done: false
  episode_len_mean: 93.03703703703704
  episode_media: {}
  episode_reward_max: 8.78000000000001
  episode_reward_mean: 1.7663888888888934
  episode_reward_min: -1.790000000000001
  episodes_this_iter: 108
  episodes_total: 7783
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5265426352492764
          entropy_coeff: 0.01
          kl: 0.013440546279408579
          policy_loss: -0.08088439668281976
          total_loss: 0.0549425174577687
          vf_explained_var: 0.8674768209457397
          vf_loss: 0.13047309586794203
    num_agent_steps_sampled: 729708
    num_agent_steps_trained: 729708
    num_steps_sampled: 729708
    num_steps_trained: 729708
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,73,10625.6,729708,1.76639,8.78,-1.79,93.037




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 739704
  custom_metrics: {}
  date: 2021-11-07_17-13-47
  done: false
  episode_len_mean: 91.28703703703704
  episode_media: {}
  episode_reward_max: 8.800000000000013
  episode_reward_mean: 2.1212962962963013
  episode_reward_min: -1.7600000000000007
  episodes_this_iter: 108
  episodes_total: 7891
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.528597446384593
          entropy_coeff: 0.01
          kl: 0.014160518187238165
          policy_loss: -0.07915605819728384
          total_loss: 0.06300719374090306
          vf_explained_var: 0.8916653394699097
          vf_loss: 0.1351897936194944
    num_agent_steps_sampled: 739704
    num_agent_steps_trained: 739704
    num_steps_sampled: 739704
    num_steps_trained: 739704
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,74,10815.4,739704,2.1213,8.8,-1.76,91.287




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 749700
  custom_metrics: {}
  date: 2021-11-07_17-16-19
  done: false
  episode_len_mean: 91.95454545454545
  episode_media: {}
  episode_reward_max: 6.760000000000014
  episode_reward_mean: 1.704909090909095
  episode_reward_min: -1.860000000000001
  episodes_this_iter: 110
  episodes_total: 8001
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.538535209191151
          entropy_coeff: 0.01
          kl: 0.014779410925070156
          policy_loss: -0.08082136617272966
          total_loss: 0.07033325833642584
          vf_explained_var: 0.8665417432785034
          vf_loss: 0.14287063036050296
    num_agent_steps_sampled: 749700
    num_agent_steps_trained: 749700
    num_steps_sampled: 749700
    num_steps_trained: 749700
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,75,10968,749700,1.70491,6.76,-1.86,91.9545


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 759696
  custom_metrics: {}
  date: 2021-11-07_17-18-40
  done: false
  episode_len_mean: 91.66055045871559
  episode_media: {}
  episode_reward_max: 8.99000000000001
  episode_reward_mean: 2.1983486238532155
  episode_reward_min: -1.5000000000000004
  episodes_this_iter: 109
  episodes_total: 8110
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.511059343509185
          entropy_coeff: 0.01
          kl: 0.014435387863285234
          policy_loss: -0.07876281150354025
          total_loss: 0.08068958955984085
          vf_explained_var: 0.8731030821800232
          vf_loss: 0.15167737532343364
    num_agent_steps_sampled: 759696
    num_agent_steps_trained: 759696
    num_steps_sampled: 759696
    num_steps_trained: 759696
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,76,11108.2,759696,2.19835,8.99,-1.5,91.6606




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 769692
  custom_metrics: {}
  date: 2021-11-07_17-21-11
  done: false
  episode_len_mean: 92.58878504672897
  episode_media: {}
  episode_reward_max: 8.870000000000012
  episode_reward_mean: 1.958130841121499
  episode_reward_min: -2.0300000000000002
  episodes_this_iter: 107
  episodes_total: 8217
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.516046618192624
          entropy_coeff: 0.01
          kl: 0.013544636572014487
          policy_loss: -0.08118102865484662
          total_loss: 0.05955630886790335
          vf_explained_var: 0.868785560131073
          vf_loss: 0.1350414258101557
    num_agent_steps_sampled: 769692
    num_agent_steps_trained: 769692
    num_steps_sampled: 769692
    num_steps_trained: 769692
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,77,11259.2,769692,1.95813,8.87,-2.03,92.5888




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 779688
  custom_metrics: {}
  date: 2021-11-07_17-24-18
  done: false
  episode_len_mean: 88.69026548672566
  episode_media: {}
  episode_reward_max: 10.400000000000015
  episode_reward_mean: 2.132035398230093
  episode_reward_min: -1.9200000000000008
  episodes_this_iter: 113
  episodes_total: 8330
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.511643094282884
          entropy_coeff: 0.01
          kl: 0.014452935020588073
          policy_loss: -0.07772132913367107
          total_loss: 0.08762711850273558
          vf_explained_var: 0.8858755230903625
          vf_loss: 0.15753928490588043
    num_agent_steps_sampled: 779688
    num_agent_steps_trained: 779688
    num_steps_sampled: 779688
    num_steps_trained: 779688
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,78,11446.6,779688,2.13204,10.4,-1.92,88.6903




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 789684
  custom_metrics: {}
  date: 2021-11-07_17-26-51
  done: false
  episode_len_mean: 91.28181818181818
  episode_media: {}
  episode_reward_max: 7.790000000000013
  episode_reward_mean: 2.343636363636368
  episode_reward_min: -1.4800000000000004
  episodes_this_iter: 110
  episodes_total: 8440
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.504136856396993
          entropy_coeff: 0.01
          kl: 0.015220129895820035
          policy_loss: -0.0800721113721275
          total_loss: 0.06741134938584943
          vf_explained_var: 0.8850389719009399
          vf_loss: 0.13785147020338565
    num_agent_steps_sampled: 789684
    num_agent_steps_trained: 789684
    num_steps_sampled: 789684
    num_steps_trained: 789684
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,79,11599.3,789684,2.34364,7.79,-1.48,91.2818


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 799680
  custom_metrics: {}
  date: 2021-11-07_17-29-15
  done: false
  episode_len_mean: 89.83928571428571
  episode_media: {}
  episode_reward_max: 9.360000000000005
  episode_reward_mean: 1.7299107142857177
  episode_reward_min: -1.7900000000000007
  episodes_this_iter: 112
  episodes_total: 8552
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5105037446714875
          entropy_coeff: 0.01
          kl: 0.01329976110942247
          policy_loss: -0.08170155062953122
          total_loss: 0.04690433479049522
          vf_explained_var: 0.8827595114707947
          vf_loss: 0.12341240307188824
    num_agent_steps_sampled: 799680
    num_agent_steps_trained: 799680
    num_steps_sampled: 799680
    num_steps_trained: 799680
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,80,11743.8,799680,1.72991,9.36,-1.79,89.8393




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 809676
  custom_metrics: {}
  date: 2021-11-07_17-31-47
  done: false
  episode_len_mean: 90.58181818181818
  episode_media: {}
  episode_reward_max: 8.810000000000015
  episode_reward_mean: 1.8276363636363677
  episode_reward_min: -1.8200000000000007
  episodes_this_iter: 110
  episodes_total: 8662
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5048060117623745
          entropy_coeff: 0.01
          kl: 0.01499426429046237
          policy_loss: -0.07754019424319267
          total_loss: 0.07540600954149014
          vf_explained_var: 0.8679239153862
          vf_loss: 0.14383545508369422
    num_agent_steps_sampled: 809676
    num_agent_steps_trained: 809676
    num_steps_sampled: 809676
    num_steps_trained: 809676
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,81,11895.3,809676,1.82764,8.81,-1.82,90.5818




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 819672
  custom_metrics: {}
  date: 2021-11-07_17-34-31
  done: false
  episode_len_mean: 89.90090090090091
  episode_media: {}
  episode_reward_max: 8.550000000000013
  episode_reward_mean: 2.197927927927933
  episode_reward_min: -1.4500000000000008
  episodes_this_iter: 111
  episodes_total: 8773
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5131867832607693
          entropy_coeff: 0.01
          kl: 0.014758836643891283
          policy_loss: -0.07846143445015973
          total_loss: 0.0798892341227804
          vf_explained_var: 0.8874314427375793
          vf_loss: 0.1498600603105166
    num_agent_steps_sampled: 819672
    num_agent_steps_trained: 819672
    num_steps_sampled: 819672
    num_steps_trained: 819672
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,82,12059,819672,2.19793,8.55,-1.45,89.9009




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 829668
  custom_metrics: {}
  date: 2021-11-07_17-37-10
  done: false
  episode_len_mean: 89.85454545454546
  episode_media: {}
  episode_reward_max: 10.910000000000007
  episode_reward_mean: 2.0070000000000037
  episode_reward_min: -1.810000000000001
  episodes_this_iter: 110
  episodes_total: 8883
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.504748384769146
          entropy_coeff: 0.01
          kl: 0.013905590404950147
          policy_loss: -0.07973765624830356
          total_loss: 0.06524901321778695
          vf_explained_var: 0.8865697383880615
          vf_loss: 0.13835547973489404
    num_agent_steps_sampled: 829668
    num_agent_steps_trained: 829668
    num_steps_sampled: 829668
    num_steps_trained: 829668
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,83,12218.3,829668,2.007,10.91,-1.81,89.8545




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 839664
  custom_metrics: {}
  date: 2021-11-07_17-39-58
  done: false
  episode_len_mean: 89.56637168141593
  episode_media: {}
  episode_reward_max: 10.410000000000013
  episode_reward_mean: 2.303805309734518
  episode_reward_min: -1.7100000000000009
  episodes_this_iter: 113
  episodes_total: 8996
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4959365671516482
          entropy_coeff: 0.01
          kl: 0.014149598240842696
          policy_loss: -0.07878890736585753
          total_loss: 0.06795282521261237
          vf_explained_var: 0.8906643390655518
          vf_loss: 0.1394665445248859
    num_agent_steps_sampled: 839664
    num_agent_steps_trained: 839664
    num_steps_sampled: 839664
    num_steps_trained: 839664
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,84,12386.2,839664,2.30381,10.41,-1.71,89.5664




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 849660
  custom_metrics: {}
  date: 2021-11-07_17-42-34
  done: false
  episode_len_mean: 90.56363636363636
  episode_media: {}
  episode_reward_max: 8.990000000000013
  episode_reward_mean: 2.132000000000004
  episode_reward_min: -1.6700000000000006
  episodes_this_iter: 110
  episodes_total: 9106
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.523839113243625
          entropy_coeff: 0.01
          kl: 0.014520280318996713
          policy_loss: -0.08398791734829672
          total_loss: 0.06642509409759799
          vf_explained_var: 0.8882313966751099
          vf_loss: 0.14257238845253348
    num_agent_steps_sampled: 849660
    num_agent_steps_trained: 849660
    num_steps_sampled: 849660
    num_steps_trained: 849660
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,85,12542.4,849660,2.132,8.99,-1.67,90.5636




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 859656
  custom_metrics: {}
  date: 2021-11-07_17-45-04
  done: false
  episode_len_mean: 90.07207207207207
  episode_media: {}
  episode_reward_max: 9.14000000000001
  episode_reward_mean: 2.270540540540545
  episode_reward_min: -1.7000000000000008
  episodes_this_iter: 111
  episodes_total: 9217
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.500977656372592
          entropy_coeff: 0.01
          kl: 0.014799948489868318
          policy_loss: -0.08000117526588658
          total_loss: 0.06659263322591527
          vf_explained_var: 0.9002763032913208
          vf_loss: 0.13788745069048472
    num_agent_steps_sampled: 859656
    num_agent_steps_trained: 859656
    num_steps_sampled: 859656
    num_steps_trained: 859656
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,86,12692.7,859656,2.27054,9.14,-1.7,90.0721




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 869652
  custom_metrics: {}
  date: 2021-11-07_17-47-54
  done: false
  episode_len_mean: 90.18181818181819
  episode_media: {}
  episode_reward_max: 8.540000000000017
  episode_reward_mean: 2.156909090909096
  episode_reward_min: -1.6400000000000006
  episodes_this_iter: 110
  episodes_total: 9327
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4884441029312265
          entropy_coeff: 0.01
          kl: 0.014277404978349428
          policy_loss: -0.08052562035970454
          total_loss: 0.06143060956054773
          vf_explained_var: 0.8992297053337097
          vf_loss: 0.13431495724914547
    num_agent_steps_sampled: 869652
    num_agent_steps_trained: 869652
    num_steps_sampled: 869652
    num_steps_trained: 869652
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,87,12862,869652,2.15691,8.54,-1.64,90.1818




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 879648
  custom_metrics: {}
  date: 2021-11-07_17-50-26
  done: false
  episode_len_mean: 89.6875
  episode_media: {}
  episode_reward_max: 9.020000000000014
  episode_reward_mean: 1.9443750000000044
  episode_reward_min: -2.1300000000000003
  episodes_this_iter: 112
  episodes_total: 9439
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5000275367345566
          entropy_coeff: 0.01
          kl: 0.014850368898322178
          policy_loss: -0.0764203554767574
          total_loss: 0.08190720317423598
          vf_explained_var: 0.8865973949432373
          vf_loss: 0.14949683621366563
    num_agent_steps_sampled: 879648
    num_agent_steps_trained: 879648
    num_steps_sampled: 879648
    num_steps_trained: 879648
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,88,13014.1,879648,1.94438,9.02,-2.13,89.6875




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 889644
  custom_metrics: {}
  date: 2021-11-07_17-53-11
  done: false
  episode_len_mean: 90.05405405405405
  episode_media: {}
  episode_reward_max: 9.590000000000003
  episode_reward_mean: 2.213243243243248
  episode_reward_min: -1.6300000000000008
  episodes_this_iter: 111
  episodes_total: 9550
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.501292507872622
          entropy_coeff: 0.01
          kl: 0.014363757640839511
          policy_loss: -0.08175013891110817
          total_loss: 0.06602619588852693
          vf_explained_var: 0.8932087421417236
          vf_loss: 0.1400668226612302
    num_agent_steps_sampled: 889644
    num_agent_steps_trained: 889644
    num_steps_sampled: 889644
    num_steps_trained: 889644
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,89,13179.2,889644,2.21324,9.59,-1.63,90.0541


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 899640
  custom_metrics: {}
  date: 2021-11-07_17-55-29
  done: false
  episode_len_mean: 90.90990990990991
  episode_media: {}
  episode_reward_max: 8.720000000000015
  episode_reward_mean: 2.2272972972973024
  episode_reward_min: -1.950000000000001
  episodes_this_iter: 111
  episodes_total: 9661
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5081558902039487
          entropy_coeff: 0.01
          kl: 0.014677665447928128
          policy_loss: -0.08146235699263903
          total_loss: 0.07540261219613827
          vf_explained_var: 0.8902168273925781
          vf_loss: 0.14850896969119198
    num_agent_steps_sampled: 899640
    num_agent_steps_trained: 899640
    num_steps_sampled: 899640
    num_steps_trained: 899640
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,90,13317.5,899640,2.2273,8.72,-1.95,90.9099




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 909636
  custom_metrics: {}
  date: 2021-11-07_17-58-03
  done: false
  episode_len_mean: 89.54954954954955
  episode_media: {}
  episode_reward_max: 8.550000000000011
  episode_reward_mean: 2.2577477477477523
  episode_reward_min: -1.6700000000000008
  episodes_this_iter: 111
  episodes_total: 9772
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.495304168595208
          entropy_coeff: 0.01
          kl: 0.01392611511662182
          policy_loss: -0.08041942024396526
          total_loss: 0.05561904059006618
          vf_explained_var: 0.8987534046173096
          vf_loss: 0.12926606999025642
    num_agent_steps_sampled: 909636
    num_agent_steps_trained: 909636
    num_steps_sampled: 909636
    num_steps_trained: 909636
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,91,13471.6,909636,2.25775,8.55,-1.67,89.5495




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 919632
  custom_metrics: {}
  date: 2021-11-07_18-00-33
  done: false
  episode_len_mean: 91.8348623853211
  episode_media: {}
  episode_reward_max: 8.930000000000012
  episode_reward_mean: 2.1586238532110147
  episode_reward_min: -2.000000000000001
  episodes_this_iter: 109
  episodes_total: 9881
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5080160894964494
          entropy_coeff: 0.01
          kl: 0.013502732182232326
          policy_loss: -0.08341670059718383
          total_loss: 0.039538293889859036
          vf_explained_var: 0.9069659113883972
          vf_loss: 0.1172742426666057
    num_agent_steps_sampled: 919632
    num_agent_steps_trained: 919632
    num_steps_sampled: 919632
    num_steps_trained: 919632
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,92,13620.7,919632,2.15862,8.93,-2,91.8349


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 929628
  custom_metrics: {}
  date: 2021-11-07_18-02-50
  done: false
  episode_len_mean: 92.02777777777777
  episode_media: {}
  episode_reward_max: 12.550000000000015
  episode_reward_mean: 2.399537037037042
  episode_reward_min: -1.900000000000001
  episodes_this_iter: 108
  episodes_total: 9989
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.507759531135233
          entropy_coeff: 0.01
          kl: 0.014646049938045675
          policy_loss: -0.07890711293324955
          total_loss: 0.0765524676690499
          vf_explained_var: 0.8965805768966675
          vf_loss: 0.1471716428016368
    num_agent_steps_sampled: 929628
    num_agent_steps_trained: 929628
    num_steps_sampled: 929628
    num_steps_trained: 929628
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,93,13758.3,929628,2.39954,12.55,-1.9,92.0278




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 939624
  custom_metrics: {}
  date: 2021-11-07_18-05-24
  done: false
  episode_len_mean: 90.45454545454545
  episode_media: {}
  episode_reward_max: 8.780000000000014
  episode_reward_mean: 2.377818181818188
  episode_reward_min: -2.1300000000000003
  episodes_this_iter: 110
  episodes_total: 10099
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.501691791338798
          entropy_coeff: 0.01
          kl: 0.014712388239307548
          policy_loss: -0.07973969169120249
          total_loss: 0.07237978885825883
          vf_explained_var: 0.9025144577026367
          vf_loss: 0.1436197388304286
    num_agent_steps_sampled: 939624
    num_agent_steps_trained: 939624
    num_steps_sampled: 939624
    num_steps_trained: 939624
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,94,13911.8,939624,2.37782,8.78,-2.13,90.4545




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 949620
  custom_metrics: {}
  date: 2021-11-07_18-07-57
  done: false
  episode_len_mean: 90.91891891891892
  episode_media: {}
  episode_reward_max: 9.180000000000007
  episode_reward_mean: 2.39504504504505
  episode_reward_min: -1.6300000000000008
  episodes_this_iter: 111
  episodes_total: 10210
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.506010322693067
          entropy_coeff: 0.01
          kl: 0.013799959852276538
          policy_loss: -0.07800931309819477
          total_loss: 0.07406979849577969
          vf_explained_var: 0.9014742374420166
          vf_loss: 0.14570118114829828
    num_agent_steps_sampled: 949620
    num_agent_steps_trained: 949620
    num_steps_sampled: 949620
    num_steps_trained: 949620
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,95,14064.7,949620,2.39505,9.18,-1.63,90.9189




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 959616
  custom_metrics: {}
  date: 2021-11-07_18-10-27
  done: false
  episode_len_mean: 91.75
  episode_media: {}
  episode_reward_max: 10.560000000000018
  episode_reward_mean: 2.282129629629635
  episode_reward_min: -2.100000000000001
  episodes_this_iter: 108
  episodes_total: 10318
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5045605761373144
          entropy_coeff: 0.01
          kl: 0.013675275727268666
          policy_loss: -0.08239337192036403
          total_loss: 0.0562633636717995
          vf_explained_var: 0.8955557942390442
          vf_loss: 0.1325483526238519
    num_agent_steps_sampled: 959616
    num_agent_steps_trained: 959616
    num_steps_sampled: 959616
    num_steps_trained: 959616
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,96,14214.7,959616,2.28213,10.56,-2.1,91.75




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 969612
  custom_metrics: {}
  date: 2021-11-07_18-13-08
  done: false
  episode_len_mean: 91.8
  episode_media: {}
  episode_reward_max: 8.840000000000016
  episode_reward_mean: 2.0868181818181863
  episode_reward_min: -2.0900000000000007
  episodes_this_iter: 110
  episodes_total: 10428
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5110848451272036
          entropy_coeff: 0.01
          kl: 0.013052004543741245
          policy_loss: -0.08239030974606673
          total_loss: 0.040776212995824145
          vf_explained_var: 0.8906089067459106
          vf_loss: 0.11854327104858353
    num_agent_steps_sampled: 969612
    num_agent_steps_trained: 969612
    num_steps_sampled: 969612
    num_steps_trained: 969612
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,97,14376,969612,2.08682,8.84,-2.09,91.8




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 979608
  custom_metrics: {}
  date: 2021-11-07_18-15-40
  done: false
  episode_len_mean: 90.97272727272727
  episode_media: {}
  episode_reward_max: 6.75000000000001
  episode_reward_mean: 1.9983636363636403
  episode_reward_min: -1.630000000000001
  episodes_this_iter: 110
  episodes_total: 10538
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5119347706819193
          entropy_coeff: 0.01
          kl: 0.01456686091716934
          policy_loss: -0.07986349296779968
          total_loss: 0.05469885888453732
          vf_explained_var: 0.9003019332885742
          vf_loss: 0.1264965690736078
    num_agent_steps_sampled: 979608
    num_agent_steps_trained: 979608
    num_steps_sampled: 979608
    num_steps_trained: 979608
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,98,14527.5,979608,1.99836,6.75,-1.63,90.9727




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 989604
  custom_metrics: {}
  date: 2021-11-07_18-18-08
  done: false
  episode_len_mean: 91.21818181818182
  episode_media: {}
  episode_reward_max: 6.990000000000016
  episode_reward_mean: 1.817454545454549
  episode_reward_min: -1.7900000000000007
  episodes_this_iter: 110
  episodes_total: 10648
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.513296970139202
          entropy_coeff: 0.01
          kl: 0.013885175295228964
          policy_loss: -0.08093450218757503
          total_loss: 0.05467126411346034
          vf_explained_var: 0.8807742595672607
          vf_loss: 0.12910656973831036
    num_agent_steps_sampled: 989604
    num_agent_steps_trained: 989604
    num_steps_sampled: 989604
    num_steps_trained: 989604
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,99,14676.3,989604,1.81745,6.99,-1.79,91.2182




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 999600
  custom_metrics: {}
  date: 2021-11-07_18-20-36
  done: false
  episode_len_mean: 93.18691588785046
  episode_media: {}
  episode_reward_max: 7.330000000000009
  episode_reward_mean: 1.9801869158878556
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 107
  episodes_total: 10755
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5287940551073125
          entropy_coeff: 0.01
          kl: 0.014124087697128574
          policy_loss: -0.08245388903678992
          total_loss: 0.05278005235406578
          vf_explained_var: 0.898181676864624
          vf_loss: 0.12834544429778416
    num_agent_steps_sampled: 999600
    num_agent_steps_trained: 999600
    num_steps_sampled: 999600
    num_steps_trained: 999600


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,100,14824,999600,1.98019,7.33,-2.05,93.1869




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1009596
  custom_metrics: {}
  date: 2021-11-07_18-23-05
  done: false
  episode_len_mean: 92.45370370370371
  episode_media: {}
  episode_reward_max: 8.97000000000001
  episode_reward_mean: 2.2103703703703754
  episode_reward_min: -1.9200000000000008
  episodes_this_iter: 108
  episodes_total: 10863
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.525417670225486
          entropy_coeff: 0.01
          kl: 0.013716308105574107
          policy_loss: -0.08257725580125792
          total_loss: 0.04634810985089877
          vf_explained_var: 0.8804720640182495
          vf_loss: 0.12293207651147475
    num_agent_steps_sampled: 1009596
    num_agent_steps_trained: 1009596
    num_steps_sampled: 1009596
    num_steps_trained: 1009

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,101,14972.8,1009596,2.21037,8.97,-1.92,92.4537


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1019592
  custom_metrics: {}
  date: 2021-11-07_18-25-22
  done: false
  episode_len_mean: 92.69444444444444
  episode_media: {}
  episode_reward_max: 10.710000000000013
  episode_reward_mean: 2.540277777777783
  episode_reward_min: -1.6300000000000006
  episodes_this_iter: 108
  episodes_total: 10971
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.522090495753492
          entropy_coeff: 0.01
          kl: 0.01466584574940887
          policy_loss: -0.07512675248181973
          total_loss: 0.07971352907136464
          vf_explained_var: 0.8866292238235474
          vf_loss: 0.14665055528012477
    num_agent_steps_sampled: 1019592
    num_agent_steps_trained: 1019592
    num_steps_sampled: 1019592
    num_steps_trained: 1019

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,102,15109.4,1019592,2.54028,10.71,-1.63,92.6944




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1029588
  custom_metrics: {}
  date: 2021-11-07_18-28-03
  done: false
  episode_len_mean: 90.73636363636363
  episode_media: {}
  episode_reward_max: 8.77000000000001
  episode_reward_mean: 2.206818181818187
  episode_reward_min: -1.7400000000000009
  episodes_this_iter: 110
  episodes_total: 11081
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.520952227176764
          entropy_coeff: 0.01
          kl: 0.013418924079286059
          policy_loss: -0.07970226787699339
          total_loss: 0.04706716005188914
          vf_explained_var: 0.9048892855644226
          vf_loss: 0.12140896362531134
    num_agent_steps_sampled: 1029588
    num_agent_steps_trained: 1029588
    num_steps_sampled: 1029588
    num_steps_trained: 10295

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,103,15270.4,1029588,2.20682,8.77,-1.74,90.7364




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1039584
  custom_metrics: {}
  date: 2021-11-07_18-30-30
  done: false
  episode_len_mean: 91.19266055045871
  episode_media: {}
  episode_reward_max: 11.21000000000001
  episode_reward_mean: 2.3452293577981704
  episode_reward_min: -1.8100000000000007
  episodes_this_iter: 109
  episodes_total: 11190
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5186590946637666
          entropy_coeff: 0.01
          kl: 0.014342167081517138
          policy_loss: -0.08033990991325715
          total_loss: 0.05858766633189387
          vf_explained_var: 0.9047171473503113
          vf_loss: 0.13144091779254696
    num_agent_steps_sampled: 1039584
    num_agent_steps_trained: 1039584
    num_steps_sampled: 1039584
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,104,15417.9,1039584,2.34523,11.21,-1.81,91.1927


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1049580
  custom_metrics: {}
  date: 2021-11-07_18-32-45
  done: false
  episode_len_mean: 92.71296296296296
  episode_media: {}
  episode_reward_max: 8.760000000000014
  episode_reward_mean: 2.1763888888888934
  episode_reward_min: -1.9100000000000008
  episodes_this_iter: 108
  episodes_total: 11298
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.52011144242735
          entropy_coeff: 0.01
          kl: 0.014385978395988566
          policy_loss: -0.07975049068928401
          total_loss: 0.08540924940831386
          vf_explained_var: 0.8695869445800781
          vf_loss: 0.15758779623633268
    num_agent_steps_sampled: 1049580
    num_agent_steps_trained: 1049580
    num_steps_sampled: 1049580
    num_steps_trained: 1049

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,105,15552.4,1049580,2.17639,8.76,-1.91,92.713




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1059576
  custom_metrics: {}
  date: 2021-11-07_18-35-28
  done: false
  episode_len_mean: 91.8256880733945
  episode_media: {}
  episode_reward_max: 6.9500000000000135
  episode_reward_mean: 1.5677064220183532
  episode_reward_min: -2.3200000000000003
  episodes_this_iter: 109
  episodes_total: 11407
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5314805081766893
          entropy_coeff: 0.01
          kl: 0.01343269074929759
          policy_loss: -0.08242232748426688
          total_loss: 0.03385962285579015
          vf_explained_var: 0.8982226848602295
          vf_loss: 0.11099540582324705
    num_agent_steps_sampled: 1059576
    num_agent_steps_trained: 1059576
    num_steps_sampled: 1059576
    num_steps_trained: 105

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,106,15715.1,1059576,1.56771,6.95,-2.32,91.8257




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1069572
  custom_metrics: {}
  date: 2021-11-07_18-38-08
  done: false
  episode_len_mean: 92.1574074074074
  episode_media: {}
  episode_reward_max: 10.700000000000012
  episode_reward_mean: 2.5025925925925976
  episode_reward_min: -1.7100000000000009
  episodes_this_iter: 108
  episodes_total: 11515
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5148282517734755
          entropy_coeff: 0.01
          kl: 0.014358388713944063
          policy_loss: -0.07870137463681973
          total_loss: 0.06394496190592519
          vf_explained_var: 0.8966584205627441
          vf_loss: 0.13508441373069063
    num_agent_steps_sampled: 1069572
    num_agent_steps_trained: 1069572
    num_steps_sampled: 1069572
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,107,15875.6,1069572,2.50259,10.7,-1.71,92.1574


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1079568
  custom_metrics: {}
  date: 2021-11-07_18-40-26
  done: false
  episode_len_mean: 92.45370370370371
  episode_media: {}
  episode_reward_max: 8.900000000000013
  episode_reward_mean: 2.4626851851851903
  episode_reward_min: -1.860000000000001
  episodes_this_iter: 108
  episodes_total: 11623
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5166496299270893
          entropy_coeff: 0.01
          kl: 0.014325235846354402
          policy_loss: -0.07841948995159732
          total_loss: 0.057436002970028384
          vf_explained_var: 0.9113554358482361
          vf_loss: 0.12838731078653892
    num_agent_steps_sampled: 1079568
    num_agent_steps_trained: 1079568
    num_steps_sampled: 1079568
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,108,16013.5,1079568,2.46269,8.9,-1.86,92.4537




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1089564
  custom_metrics: {}
  date: 2021-11-07_18-43-11
  done: false
  episode_len_mean: 91.32727272727273
  episode_media: {}
  episode_reward_max: 11.020000000000012
  episode_reward_mean: 2.3139090909090965
  episode_reward_min: -1.5200000000000005
  episodes_this_iter: 110
  episodes_total: 11733
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.523335959157373
          entropy_coeff: 0.01
          kl: 0.014636058855517685
          policy_loss: -0.08014618202757377
          total_loss: 0.06281705499650576
          vf_explained_var: 0.8958575129508972
          vf_loss: 0.13485382453371317
    num_agent_steps_sampled: 1089564
    num_agent_steps_trained: 1089564
    num_steps_sampled: 1089564
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,109,16178.3,1089564,2.31391,11.02,-1.52,91.3273




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1099560
  custom_metrics: {}
  date: 2021-11-07_18-45-42
  done: false
  episode_len_mean: 90.46846846846847
  episode_media: {}
  episode_reward_max: 9.130000000000006
  episode_reward_mean: 2.414144144144149
  episode_reward_min: -1.3000000000000005
  episodes_this_iter: 111
  episodes_total: 11844
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.50868232820788
          entropy_coeff: 0.01
          kl: 0.014357509750954156
          policy_loss: -0.07690263481126126
          total_loss: 0.07283436700255952
          vf_explained_var: 0.9075911045074463
          vf_loss: 0.142115621553718
    num_agent_steps_sampled: 1099560
    num_agent_steps_trained: 1099560
    num_steps_sampled: 1099560
    num_steps_trained: 1099560

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,110,16329.1,1099560,2.41414,9.13,-1.3,90.4685


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1109556
  custom_metrics: {}
  date: 2021-11-07_18-48-01
  done: false
  episode_len_mean: 91.41284403669725
  episode_media: {}
  episode_reward_max: 11.270000000000007
  episode_reward_mean: 2.181009174311931
  episode_reward_min: -1.8200000000000012
  episodes_this_iter: 109
  episodes_total: 11953
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.512505729789408
          entropy_coeff: 0.01
          kl: 0.013891259441643959
          policy_loss: -0.07893491332204296
          total_loss: 0.05453095465946274
          vf_explained_var: 0.9038194417953491
          vf_loss: 0.12694489868501058
    num_agent_steps_sampled: 1109556
    num_agent_steps_trained: 1109556
    num_steps_sampled: 1109556
    num_steps_trained: 110

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,111,16468.6,1109556,2.18101,11.27,-1.82,91.4128




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1119552
  custom_metrics: {}
  date: 2021-11-07_18-50-41
  done: false
  episode_len_mean: 91.34545454545454
  episode_media: {}
  episode_reward_max: 8.720000000000011
  episode_reward_mean: 2.1945454545454597
  episode_reward_min: -1.790000000000001
  episodes_this_iter: 110
  episodes_total: 12063
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.525234479781909
          entropy_coeff: 0.01
          kl: 0.01397112770146404
          policy_loss: -0.07925234970947106
          total_loss: 0.06911996066825003
          vf_explained_var: 0.8871068954467773
          vf_loss: 0.14179667936972318
    num_agent_steps_sampled: 1119552
    num_agent_steps_trained: 1119552
    num_steps_sampled: 1119552
    num_steps_trained: 11195

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,112,16627.9,1119552,2.19455,8.72,-1.79,91.3455




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1129548
  custom_metrics: {}
  date: 2021-11-07_18-53-11
  done: false
  episode_len_mean: 92.35514018691589
  episode_media: {}
  episode_reward_max: 12.450000000000017
  episode_reward_mean: 2.4461682242990714
  episode_reward_min: -1.5900000000000007
  episodes_this_iter: 107
  episodes_total: 12170
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.523615714423677
          entropy_coeff: 0.01
          kl: 0.014986794695587925
          policy_loss: -0.07860761327016302
          total_loss: 0.07395939325802346
          vf_explained_var: 0.8966024518013
          vf_loss: 0.14366137204835047
    num_agent_steps_sampled: 1129548
    num_agent_steps_trained: 1129548
    num_steps_sampled: 1129548
    num_steps_trained: 11295

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,113,16778.1,1129548,2.44617,12.45,-1.59,92.3551




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1139544
  custom_metrics: {}
  date: 2021-11-07_18-55-41
  done: false
  episode_len_mean: 91.89908256880734
  episode_media: {}
  episode_reward_max: 8.210000000000012
  episode_reward_mean: 2.2211926605504644
  episode_reward_min: -1.830000000000001
  episodes_this_iter: 109
  episodes_total: 12279
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5060009811678503
          entropy_coeff: 0.01
          kl: 0.013123893039810436
          policy_loss: -0.08086327488701313
          total_loss: 0.03465338381819236
          vf_explained_var: 0.9099036455154419
          vf_loss: 0.11067879903090433
    num_agent_steps_sampled: 1139544
    num_agent_steps_trained: 1139544
    num_steps_sampled: 1139544
    num_steps_trained: 113

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,114,16928.4,1139544,2.22119,8.21,-1.83,91.8991




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1149540
  custom_metrics: {}
  date: 2021-11-07_18-58-09
  done: false
  episode_len_mean: 92.57407407407408
  episode_media: {}
  episode_reward_max: 10.210000000000017
  episode_reward_mean: 2.1409259259259312
  episode_reward_min: -1.9500000000000008
  episodes_this_iter: 108
  episodes_total: 12387
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5070309029685127
          entropy_coeff: 0.01
          kl: 0.01402945550235612
          policy_loss: -0.07935358458556808
          total_loss: 0.04489233230567004
          vf_explained_var: 0.9064720273017883
          vf_loss: 0.1173553717473888
    num_agent_steps_sampled: 1149540
    num_agent_steps_trained: 1149540
    num_steps_sampled: 1149540
    num_steps_trained: 114

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,115,17076.2,1149540,2.14093,10.21,-1.95,92.5741




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1159536
  custom_metrics: {}
  date: 2021-11-07_19-00-51
  done: false
  episode_len_mean: 92.08333333333333
  episode_media: {}
  episode_reward_max: 9.530000000000005
  episode_reward_mean: 2.060370370370375
  episode_reward_min: -1.8000000000000007
  episodes_this_iter: 108
  episodes_total: 12495
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5087737841483873
          entropy_coeff: 0.01
          kl: 0.012695746109073086
          policy_loss: -0.08611554581130672
          total_loss: 0.022979749304552873
          vf_explained_var: 0.908194899559021
          vf_loss: 0.10526053556997297
    num_agent_steps_sampled: 1159536
    num_agent_steps_trained: 1159536
    num_steps_sampled: 1159536
    num_steps_trained: 115

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,116,17237.8,1159536,2.06037,9.53,-1.8,92.0833




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1169532
  custom_metrics: {}
  date: 2021-11-07_19-03-40
  done: false
  episode_len_mean: 91.8256880733945
  episode_media: {}
  episode_reward_max: 10.680000000000016
  episode_reward_mean: 2.538073394495418
  episode_reward_min: -1.7300000000000009
  episodes_this_iter: 109
  episodes_total: 12604
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5097839773210704
          entropy_coeff: 0.01
          kl: 0.015231431065808158
          policy_loss: -0.07644215349911943
          total_loss: 0.08671518789254065
          vf_explained_var: 0.8950908184051514
          vf_loss: 0.15355607533715984
    num_agent_steps_sampled: 1169532
    num_agent_steps_trained: 1169532
    num_steps_sampled: 1169532
    num_steps_trained: 116

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,117,17406.7,1169532,2.53807,10.68,-1.73,91.8257




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1179528
  custom_metrics: {}
  date: 2021-11-07_19-06-23
  done: false
  episode_len_mean: 90.82727272727273
  episode_media: {}
  episode_reward_max: 9.00000000000001
  episode_reward_mean: 2.0773636363636414
  episode_reward_min: -1.7000000000000004
  episodes_this_iter: 110
  episodes_total: 12714
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5010064461292365
          entropy_coeff: 0.01
          kl: 0.012926075054556696
          policy_loss: -0.08155576274053664
          total_loss: 0.041042282741166586
          vf_explained_var: 0.898971676826477
          vf_loss: 0.11816089402040482
    num_agent_steps_sampled: 1179528
    num_agent_steps_trained: 1179528
    num_steps_sampled: 1179528
    num_steps_trained: 117

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,118,17569.8,1179528,2.07736,9,-1.7,90.8273




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1189524
  custom_metrics: {}
  date: 2021-11-07_19-09-09
  done: false
  episode_len_mean: 90.22522522522523
  episode_media: {}
  episode_reward_max: 9.88
  episode_reward_mean: 1.8671171171171212
  episode_reward_min: -2.0699999999999994
  episodes_this_iter: 111
  episodes_total: 12825
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.501251906615037
          entropy_coeff: 0.01
          kl: 0.012929444165230623
          policy_loss: -0.08346219852280158
          total_loss: 0.023793616576486418
          vf_explained_var: 0.8934633135795593
          vf_loss: 0.10281344397455199
    num_agent_steps_sampled: 1189524
    num_agent_steps_trained: 1189524
    num_steps_sampled: 1189524
    num_steps_trained: 1189524
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,119,17736.2,1189524,1.86712,9.88,-2.07,90.2252




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1199520
  custom_metrics: {}
  date: 2021-11-07_19-11-50
  done: false
  episode_len_mean: 89.74107142857143
  episode_media: {}
  episode_reward_max: 8.510000000000016
  episode_reward_mean: 2.4444642857142918
  episode_reward_min: -2.0699999999999994
  episodes_this_iter: 112
  episodes_total: 12937
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4763449143140743
          entropy_coeff: 0.01
          kl: 0.013310621567230181
          policy_loss: -0.08069694240091957
          total_loss: 0.04991413564461036
          vf_explained_var: 0.922523558139801
          vf_loss: 0.1250512657352747
    num_agent_steps_sampled: 1199520
    num_agent_steps_trained: 1199520
    num_steps_sampled: 1199520
    num_steps_trained: 1199

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,120,17897.2,1199520,2.44446,8.51,-2.07,89.7411




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1209516
  custom_metrics: {}
  date: 2021-11-07_19-14-23
  done: false
  episode_len_mean: 92.42592592592592
  episode_media: {}
  episode_reward_max: 10.600000000000016
  episode_reward_mean: 2.3746296296296348
  episode_reward_min: -1.800000000000001
  episodes_this_iter: 108
  episodes_total: 13045
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5034104114923723
          entropy_coeff: 0.01
          kl: 0.014258952483697403
          policy_loss: -0.07806427923437112
          total_loss: 0.0604373816257486
          vf_explained_var: 0.8886040449142456
          vf_loss: 0.1310520863558492
    num_agent_steps_sampled: 1209516
    num_agent_steps_trained: 1209516
    num_steps_sampled: 1209516
    num_steps_trained: 1209

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,121,18050.3,1209516,2.37463,10.6,-1.8,92.4259


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1219512
  custom_metrics: {}
  date: 2021-11-07_19-16-40
  done: false
  episode_len_mean: 92.41284403669725
  episode_media: {}
  episode_reward_max: 10.890000000000011
  episode_reward_mean: 1.9433027522935822
  episode_reward_min: -2.4599999999999995
  episodes_this_iter: 109
  episodes_total: 13154
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.518586225183601
          entropy_coeff: 0.01
          kl: 0.01404099937477519
          policy_loss: -0.08146773364761065
          total_loss: 0.05522946181635444
          vf_explained_var: 0.8927851319313049
          vf_loss: 0.12989590593542044
    num_agent_steps_sampled: 1219512
    num_agent_steps_trained: 1219512
    num_steps_sampled: 1219512
    num_steps_trained: 121

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,122,18186.7,1219512,1.9433,10.89,-2.46,92.4128




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1229508
  custom_metrics: {}
  date: 2021-11-07_19-19-09
  done: false
  episode_len_mean: 93.13207547169812
  episode_media: {}
  episode_reward_max: 9.060000000000013
  episode_reward_mean: 2.0558490566037784
  episode_reward_min: -1.760000000000001
  episodes_this_iter: 106
  episodes_total: 13260
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5046601751930693
          entropy_coeff: 0.01
          kl: 0.01345825673824362
          policy_loss: -0.08294781334857401
          total_loss: 0.03578591674216028
          vf_explained_var: 0.9072006940841675
          vf_loss: 0.11312073982503806
    num_agent_steps_sampled: 1229508
    num_agent_steps_trained: 1229508
    num_steps_sampled: 1229508
    num_steps_trained: 1229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,123,18336,1229508,2.05585,9.06,-1.76,93.1321




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1239504
  custom_metrics: {}
  date: 2021-11-07_19-21-38
  done: false
  episode_len_mean: 91.46363636363637
  episode_media: {}
  episode_reward_max: 10.440000000000015
  episode_reward_mean: 2.272090909090915
  episode_reward_min: -1.4600000000000006
  episodes_this_iter: 110
  episodes_total: 13370
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.500634270855504
          entropy_coeff: 0.01
          kl: 0.013746317402417295
          policy_loss: -0.0813627652075683
          total_loss: 0.04917512311496668
          vf_explained_var: 0.9140698313713074
          vf_loss: 0.12422840073545519
    num_agent_steps_sampled: 1239504
    num_agent_steps_trained: 1239504
    num_steps_sampled: 1239504
    num_steps_trained: 1239

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,124,18485.1,1239504,2.27209,10.44,-1.46,91.4636


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1249500
  custom_metrics: {}
  date: 2021-11-07_19-23-54
  done: false
  episode_len_mean: 92.16822429906541
  episode_media: {}
  episode_reward_max: 9.290000000000006
  episode_reward_mean: 2.299065420560754
  episode_reward_min: -1.7400000000000009
  episodes_this_iter: 107
  episodes_total: 13477
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.514885278440948
          entropy_coeff: 0.01
          kl: 0.0140508476405776
          policy_loss: -0.08324291240742318
          total_loss: 0.047982333981010136
          vf_explained_var: 0.901093602180481
          vf_loss: 0.12436451087904792
    num_agent_steps_sampled: 1249500
    num_agent_steps_trained: 1249500
    num_steps_sampled: 1249500
    num_steps_trained: 124950

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,125,18620.6,1249500,2.29907,9.29,-1.74,92.1682


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1259496
  custom_metrics: {}
  date: 2021-11-07_19-26-12
  done: false
  episode_len_mean: 94.35514018691589
  episode_media: {}
  episode_reward_max: 10.430000000000017
  episode_reward_mean: 2.399626168224305
  episode_reward_min: -1.800000000000001
  episodes_this_iter: 107
  episodes_total: 13584
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.500425718177078
          entropy_coeff: 0.01
          kl: 0.012758639392001739
          policy_loss: -0.0860444890979964
          total_loss: 0.015013092960047925
          vf_explained_var: 0.9142963290214539
          vf_loss: 0.09699606259918621
    num_agent_steps_sampled: 1259496
    num_agent_steps_trained: 1259496
    num_steps_sampled: 1259496
    num_steps_trained: 1259

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,126,18758.9,1259496,2.39963,10.43,-1.8,94.3551




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1269492
  custom_metrics: {}
  date: 2021-11-07_19-28-43
  done: false
  episode_len_mean: 91.30275229357798
  episode_media: {}
  episode_reward_max: 10.690000000000015
  episode_reward_mean: 2.47642201834863
  episode_reward_min: -1.880000000000001
  episodes_this_iter: 109
  episodes_total: 13693
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4900931015992778
          entropy_coeff: 0.01
          kl: 0.014026295217298927
          policy_loss: -0.08115572390170434
          total_loss: 0.07229705712972925
          vf_explained_var: 0.9071541428565979
          vf_loss: 0.14640005772662723
    num_agent_steps_sampled: 1269492
    num_agent_steps_trained: 1269492
    num_steps_sampled: 1269492
    num_steps_trained: 1269

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,127,18910.1,1269492,2.47642,10.69,-1.88,91.3028




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1279488
  custom_metrics: {}
  date: 2021-11-07_19-31-08
  done: false
  episode_len_mean: 93.33333333333333
  episode_media: {}
  episode_reward_max: 8.690000000000017
  episode_reward_mean: 2.344907407407414
  episode_reward_min: -1.7600000000000007
  episodes_this_iter: 108
  episodes_total: 13801
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.499899204368265
          entropy_coeff: 0.01
          kl: 0.013446652821213066
          policy_loss: -0.08651474219802607
          total_loss: 0.02246232653259594
          vf_explained_var: 0.9131360054016113
          vf_loss: 0.10334290511842467
    num_agent_steps_sampled: 1279488
    num_agent_steps_trained: 1279488
    num_steps_sampled: 1279488
    num_steps_trained: 1279

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,128,19054.9,1279488,2.34491,8.69,-1.76,93.3333




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1289484
  custom_metrics: {}
  date: 2021-11-07_19-33-58
  done: false
  episode_len_mean: 93.29906542056075
  episode_media: {}
  episode_reward_max: 9.000000000000009
  episode_reward_mean: 2.795140186915895
  episode_reward_min: -1.790000000000001
  episodes_this_iter: 107
  episodes_total: 13908
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4843217611312864
          entropy_coeff: 0.01
          kl: 0.014241642459987582
          policy_loss: -0.08193998758513958
          total_loss: 0.049130447644294584
          vf_explained_var: 0.908458948135376
          vf_loss: 0.12346941063451207
    num_agent_steps_sampled: 1289484
    num_agent_steps_trained: 1289484
    num_steps_sampled: 1289484
    num_steps_trained: 1289

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,129,19224.8,1289484,2.79514,9,-1.79,93.2991




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1299480
  custom_metrics: {}
  date: 2021-11-07_19-36-28
  done: false
  episode_len_mean: 94.39047619047619
  episode_media: {}
  episode_reward_max: 9.120000000000013
  episode_reward_mean: 2.46780952380953
  episode_reward_min: -1.8900000000000012
  episodes_this_iter: 105
  episodes_total: 14013
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5040195259273563
          entropy_coeff: 0.01
          kl: 0.014964390928174129
          policy_loss: -0.08056974953884243
          total_loss: 0.05913820854332457
          vf_explained_var: 0.9033365845680237
          vf_loss: 0.13065739871345014
    num_agent_steps_sampled: 1299480
    num_agent_steps_trained: 1299480
    num_steps_sampled: 1299480
    num_steps_trained: 1299

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,130,19374.8,1299480,2.46781,9.12,-1.89,94.3905




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1309476
  custom_metrics: {}
  date: 2021-11-07_19-38-53
  done: false
  episode_len_mean: 93.52777777777777
  episode_media: {}
  episode_reward_max: 11.260000000000009
  episode_reward_mean: 2.5129629629629693
  episode_reward_min: -1.8200000000000007
  episodes_this_iter: 108
  episodes_total: 14121
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4992141876465235
          entropy_coeff: 0.01
          kl: 0.01475456013245197
          policy_loss: -0.07993803051037666
          total_loss: 0.06915044648588722
          vf_explained_var: 0.8963756561279297
          vf_loss: 0.1404678841185175
    num_agent_steps_sampled: 1309476
    num_agent_steps_trained: 1309476
    num_steps_sampled: 1309476
    num_steps_trained: 130

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,131,19520.1,1309476,2.51296,11.26,-1.82,93.5278




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1319472
  custom_metrics: {}
  date: 2021-11-07_19-41-21
  done: false
  episode_len_mean: 95.40384615384616
  episode_media: {}
  episode_reward_max: 8.400000000000016
  episode_reward_mean: 2.1100961538461593
  episode_reward_min: -1.6200000000000008
  episodes_this_iter: 104
  episodes_total: 14225
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.49353743862902
          entropy_coeff: 0.01
          kl: 0.013361837810790573
          policy_loss: -0.08046111927455307
          total_loss: 0.040719077363610266
          vf_explained_var: 0.9043065905570984
          vf_loss: 0.11567563363390727
    num_agent_steps_sampled: 1319472
    num_agent_steps_trained: 1319472
    num_steps_sampled: 1319472
    num_steps_trained: 131

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,132,19667.8,1319472,2.1101,8.4,-1.62,95.4038


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1329468
  custom_metrics: {}
  date: 2021-11-07_19-43-37
  done: false
  episode_len_mean: 93.30841121495327
  episode_media: {}
  episode_reward_max: 8.760000000000018
  episode_reward_mean: 2.404205607476642
  episode_reward_min: -1.950000000000001
  episodes_this_iter: 107
  episodes_total: 14332
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4733550801236404
          entropy_coeff: 0.01
          kl: 0.013591523559847758
          policy_loss: -0.08160322504197685
          total_loss: 0.03580214035434601
          vf_explained_var: 0.9145292639732361
          vf_loss: 0.11117572514738283
    num_agent_steps_sampled: 1329468
    num_agent_steps_trained: 1329468
    num_steps_sampled: 1329468
    num_steps_trained: 1329

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,133,19803.5,1329468,2.40421,8.76,-1.95,93.3084




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1339464
  custom_metrics: {}
  date: 2021-11-07_19-46-18
  done: false
  episode_len_mean: 91.61467889908256
  episode_media: {}
  episode_reward_max: 12.740000000000016
  episode_reward_mean: 2.5471559633027585
  episode_reward_min: -2.3199999999999976
  episodes_this_iter: 109
  episodes_total: 14441
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4840445675401606
          entropy_coeff: 0.01
          kl: 0.014338742352346194
          policy_loss: -0.07858670271344037
          total_loss: 0.07351409788601673
          vf_explained_var: 0.9144460558891296
          vf_loss: 0.1442757984647193
    num_agent_steps_sampled: 1339464
    num_agent_steps_trained: 1339464
    num_steps_sampled: 1339464
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,134,19964.4,1339464,2.54716,12.74,-2.32,91.6147




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1349460
  custom_metrics: {}
  date: 2021-11-07_19-48-47
  done: false
  episode_len_mean: 92.19266055045871
  episode_media: {}
  episode_reward_max: 10.700000000000015
  episode_reward_mean: 2.650183486238538
  episode_reward_min: -1.7000000000000008
  episodes_this_iter: 109
  episodes_total: 14550
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4836563884702505
          entropy_coeff: 0.01
          kl: 0.013231605002724939
          policy_loss: -0.07980369379441453
          total_loss: 0.0397089596463638
          vf_explained_var: 0.9260602593421936
          vf_loss: 0.11420596749800392
    num_agent_steps_sampled: 1349460
    num_agent_steps_trained: 1349460
    num_steps_sampled: 1349460
    num_steps_trained: 134

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,135,20113.1,1349460,2.65018,10.7,-1.7,92.1927


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1359456
  custom_metrics: {}
  date: 2021-11-07_19-51-02
  done: false
  episode_len_mean: 94.47169811320755
  episode_media: {}
  episode_reward_max: 10.450000000000012
  episode_reward_mean: 2.6295283018867983
  episode_reward_min: -2.17
  episodes_this_iter: 106
  episodes_total: 14656
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.487950796754951
          entropy_coeff: 0.01
          kl: 0.013267317901245999
          policy_loss: -0.08512308164778301
          total_loss: 0.028191216457157563
          vf_explained_var: 0.9113751649856567
          vf_loss: 0.10796919675846385
    num_agent_steps_sampled: 1359456
    num_agent_steps_trained: 1359456
    num_steps_sampled: 1359456
    num_steps_trained: 1359456
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,136,20248.4,1359456,2.62953,10.45,-2.17,94.4717




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1369452
  custom_metrics: {}
  date: 2021-11-07_19-53-31
  done: false
  episode_len_mean: 93.39622641509433
  episode_media: {}
  episode_reward_max: 10.740000000000009
  episode_reward_mean: 2.1699056603773634
  episode_reward_min: -2.09
  episodes_this_iter: 106
  episodes_total: 14762
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4930481588738598
          entropy_coeff: 0.01
          kl: 0.013458897937996999
          policy_loss: -0.08117077029509168
          total_loss: 0.03536067003559353
          vf_explained_var: 0.9024460911750793
          vf_loss: 0.11080086907117158
    num_agent_steps_sampled: 1369452
    num_agent_steps_trained: 1369452
    num_steps_sampled: 1369452
    num_steps_trained: 1369452
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,137,20397.9,1369452,2.16991,10.74,-2.09,93.3962




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1379448
  custom_metrics: {}
  date: 2021-11-07_19-56-39
  done: false
  episode_len_mean: 92.85321100917432
  episode_media: {}
  episode_reward_max: 11.120000000000012
  episode_reward_mean: 2.8492660550458786
  episode_reward_min: -1.930000000000001
  episodes_this_iter: 109
  episodes_total: 14871
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4995739828827035
          entropy_coeff: 0.01
          kl: 0.01366194098387174
          policy_loss: -0.08060027715296317
          total_loss: 0.048665314701059434
          vf_explained_var: 0.923119068145752
          vf_loss: 0.12313772107864547
    num_agent_steps_sampled: 1379448
    num_agent_steps_trained: 1379448
    num_steps_sampled: 1379448
    num_steps_trained: 137

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,138,20585.5,1379448,2.84927,11.12,-1.93,92.8532




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1389444
  custom_metrics: {}
  date: 2021-11-07_19-59-22
  done: false
  episode_len_mean: 91.34862385321101
  episode_media: {}
  episode_reward_max: 8.610000000000012
  episode_reward_mean: 2.142844036697253
  episode_reward_min: -1.9800000000000009
  episodes_this_iter: 109
  episodes_total: 14980
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.49713330024328
          entropy_coeff: 0.01
          kl: 0.012816549188123538
          policy_loss: -0.0850025643602523
          total_loss: 0.019741304371601497
          vf_explained_var: 0.92364501953125
          vf_loss: 0.10051750023650308
    num_agent_steps_sampled: 1389444
    num_agent_steps_trained: 1389444
    num_steps_sampled: 1389444
    num_steps_trained: 1389444

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,139,20748.6,1389444,2.14284,8.61,-1.98,91.3486


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1399440
  custom_metrics: {}
  date: 2021-11-07_20-01-37
  done: false
  episode_len_mean: 94.54285714285714
  episode_media: {}
  episode_reward_max: 13.13000000000001
  episode_reward_mean: 2.4348571428571484
  episode_reward_min: -1.820000000000001
  episodes_this_iter: 105
  episodes_total: 15085
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.515169446081178
          entropy_coeff: 0.01
          kl: 0.013762743440845573
          policy_loss: -0.08365266160068349
          total_loss: 0.03725298355277787
          vf_explained_var: 0.9057474136352539
          vf_loss: 0.11470408904581116
    num_agent_steps_sampled: 1399440
    num_agent_steps_trained: 1399440
    num_steps_sampled: 1399440
    num_steps_trained: 1399

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,140,20883.7,1399440,2.43486,13.13,-1.82,94.5429




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1409436
  custom_metrics: {}
  date: 2021-11-07_20-04-03
  done: false
  episode_len_mean: 94.94339622641509
  episode_media: {}
  episode_reward_max: 14.540000000000015
  episode_reward_mean: 2.048962264150948
  episode_reward_min: -2.189999999999997
  episodes_this_iter: 106
  episodes_total: 15191
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.516207298458132
          entropy_coeff: 0.01
          kl: 0.013319692389660992
          policy_loss: -0.08168262255210908
          total_loss: 0.046163785684471714
          vf_explained_var: 0.8952115178108215
          vf_loss: 0.12266455605052985
    num_agent_steps_sampled: 1409436
    num_agent_steps_trained: 1409436
    num_steps_sampled: 1409436
    num_steps_trained: 140

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,141,21029.1,1409436,2.04896,14.54,-2.19,94.9434




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1419432
  custom_metrics: {}
  date: 2021-11-07_20-06-32
  done: false
  episode_len_mean: 93.30841121495327
  episode_media: {}
  episode_reward_max: 10.790000000000015
  episode_reward_mean: 2.4016822429906597
  episode_reward_min: -1.790000000000001
  episodes_this_iter: 107
  episodes_total: 15298
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.499329665583423
          entropy_coeff: 0.01
          kl: 0.013086006725173576
          policy_loss: -0.08364523139535489
          total_loss: 0.040723531485463565
          vf_explained_var: 0.9041531085968018
          vf_loss: 0.11955049862114028
    num_agent_steps_sampled: 1419432
    num_agent_steps_trained: 1419432
    num_steps_sampled: 1419432
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,142,21177.8,1419432,2.40168,10.79,-1.79,93.3084


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1429428
  custom_metrics: {}
  date: 2021-11-07_20-08-49
  done: false
  episode_len_mean: 93.23148148148148
  episode_media: {}
  episode_reward_max: 10.740000000000016
  episode_reward_mean: 2.2029629629629683
  episode_reward_min: -1.730000000000001
  episodes_this_iter: 108
  episodes_total: 15406
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5046103404118463
          entropy_coeff: 0.01
          kl: 0.012885041351038799
          policy_loss: -0.08330719826870367
          total_loss: 0.01919776639765781
          vf_explained_var: 0.9158593416213989
          vf_loss: 0.09819733213362658
    num_agent_steps_sampled: 1429428
    num_agent_steps_trained: 1429428
    num_steps_sampled: 1429428
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,143,21315.2,1429428,2.20296,10.74,-1.73,93.2315




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1439424
  custom_metrics: {}
  date: 2021-11-07_20-11-19
  done: false
  episode_len_mean: 93.11111111111111
  episode_media: {}
  episode_reward_max: 10.290000000000017
  episode_reward_mean: 2.511759259259265
  episode_reward_min: -1.7000000000000006
  episodes_this_iter: 108
  episodes_total: 15514
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5039966037130763
          entropy_coeff: 0.01
          kl: 0.014304469593943388
          policy_loss: -0.08523369189829398
          total_loss: 0.05408409167495039
          vf_explained_var: 0.9086123108863831
          vf_loss: 0.13177037742068498
    num_agent_steps_sampled: 1439424
    num_agent_steps_trained: 1439424
    num_steps_sampled: 1439424
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,144,21464.7,1439424,2.51176,10.29,-1.7,93.1111




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1449420
  custom_metrics: {}
  date: 2021-11-07_20-13-49
  done: false
  episode_len_mean: 92.10185185185185
  episode_media: {}
  episode_reward_max: 10.450000000000017
  episode_reward_mean: 2.2881481481481547
  episode_reward_min: -1.8300000000000007
  episodes_this_iter: 108
  episodes_total: 15622
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5045489062610855
          entropy_coeff: 0.01
          kl: 0.012683153689580541
          policy_loss: -0.08433173545118836
          total_loss: 0.0290465959522905
          vf_explained_var: 0.9192653894424438
          vf_loss: 0.10953000967796796
    num_agent_steps_sampled: 1449420
    num_agent_steps_trained: 1449420
    num_steps_sampled: 1449420
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,145,21614.7,1449420,2.28815,10.45,-1.83,92.1019


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1459416
  custom_metrics: {}
  date: 2021-11-07_20-16-04
  done: false
  episode_len_mean: 93.73831775700934
  episode_media: {}
  episode_reward_max: 12.670000000000016
  episode_reward_mean: 2.7921495327102877
  episode_reward_min: -1.7400000000000007
  episodes_this_iter: 107
  episodes_total: 15729
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.503856944018959
          entropy_coeff: 0.01
          kl: 0.01364988594154093
          policy_loss: -0.08426331508204214
          total_loss: 0.031405988520281945
          vf_explained_var: 0.9329918622970581
          vf_loss: 0.10961172622509109
    num_agent_steps_sampled: 1459416
    num_agent_steps_trained: 1459416
    num_steps_sampled: 1459416
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,146,21749.8,1459416,2.79215,12.67,-1.74,93.7383




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1469412
  custom_metrics: {}
  date: 2021-11-07_20-19-02
  done: false
  episode_len_mean: 91.93518518518519
  episode_media: {}
  episode_reward_max: 8.820000000000013
  episode_reward_mean: 2.475555555555561
  episode_reward_min: -1.8200000000000007
  episodes_this_iter: 108
  episodes_total: 15837
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.493053421811161
          entropy_coeff: 0.01
          kl: 0.013177542322150824
          policy_loss: -0.0852029150686203
          total_loss: 0.018763898856317004
          vf_explained_var: 0.9279175996780396
          vf_loss: 0.09887725947997891
    num_agent_steps_sampled: 1469412
    num_agent_steps_trained: 1469412
    num_steps_sampled: 1469412
    num_steps_trained: 1469

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,147,21928.4,1469412,2.47556,8.82,-1.82,91.9352




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1479408
  custom_metrics: {}
  date: 2021-11-07_20-21-35
  done: false
  episode_len_mean: 91.35779816513761
  episode_media: {}
  episode_reward_max: 9.30000000000001
  episode_reward_mean: 2.2276146788990876
  episode_reward_min: -2.12
  episodes_this_iter: 109
  episodes_total: 15946
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.501949546683548
          entropy_coeff: 0.01
          kl: 0.014320820369194808
          policy_loss: -0.08323524749049774
          total_loss: 0.044371284697300346
          vf_explained_var: 0.9120408296585083
          vf_loss: 0.12000140766334584
    num_agent_steps_sampled: 1479408
    num_agent_steps_trained: 1479408
    num_steps_sampled: 1479408
    num_steps_trained: 1479408
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,148,22080.7,1479408,2.22761,9.3,-2.12,91.3578


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1489404
  custom_metrics: {}
  date: 2021-11-07_20-23-49
  done: false
  episode_len_mean: 93.4392523364486
  episode_media: {}
  episode_reward_max: 9.270000000000012
  episode_reward_mean: 2.272056074766361
  episode_reward_min: -2.0099999999999993
  episodes_this_iter: 107
  episodes_total: 16053
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5102897226301013
          entropy_coeff: 0.01
          kl: 0.013858118290875892
          policy_loss: -0.08079682507066645
          total_loss: 0.05169509374974375
          vf_explained_var: 0.904965877532959
          vf_loss: 0.12602428847159713
    num_agent_steps_sampled: 1489404
    num_agent_steps_trained: 1489404
    num_steps_sampled: 1489404
    num_steps_trained: 14894

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,149,22215.1,1489404,2.27206,9.27,-2.01,93.4393




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1499400
  custom_metrics: {}
  date: 2021-11-07_20-26-18
  done: false
  episode_len_mean: 92.1574074074074
  episode_media: {}
  episode_reward_max: 8.590000000000018
  episode_reward_mean: 2.251111111111117
  episode_reward_min: -1.790000000000001
  episodes_this_iter: 108
  episodes_total: 16161
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.496755850213206
          entropy_coeff: 0.01
          kl: 0.013401686361693923
          policy_loss: -0.08551507297043617
          total_loss: 0.03254095756440845
          vf_explained_var: 0.9145838618278503
          vf_loss: 0.11249287160447775
    num_agent_steps_sampled: 1499400
    num_agent_steps_trained: 1499400
    num_steps_sampled: 1499400
    num_steps_trained: 149940

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,150,22364.2,1499400,2.25111,8.59,-1.79,92.1574




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1509396
  custom_metrics: {}
  date: 2021-11-07_20-29-00
  done: false
  episode_len_mean: 92.22935779816514
  episode_media: {}
  episode_reward_max: 10.850000000000016
  episode_reward_mean: 2.5500000000000056
  episode_reward_min: -1.5900000000000007
  episodes_this_iter: 109
  episodes_total: 16270
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5006105113233255
          entropy_coeff: 0.01
          kl: 0.013747328631685223
          policy_loss: -0.08310312086827734
          total_loss: 0.030398918519544807
          vf_explained_var: 0.9092439413070679
          vf_loss: 0.10719001206816134
    num_agent_steps_sampled: 1509396
    num_agent_steps_trained: 1509396
    num_steps_sampled: 1509396
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,151,22525.9,1509396,2.55,10.85,-1.59,92.2294


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1519392
  custom_metrics: {}
  date: 2021-11-07_20-31-18
  done: false
  episode_len_mean: 93.83962264150944
  episode_media: {}
  episode_reward_max: 12.920000000000014
  episode_reward_mean: 2.5735849056603834
  episode_reward_min: -2.3100000000000005
  episodes_this_iter: 106
  episodes_total: 16376
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5027895134738367
          entropy_coeff: 0.01
          kl: 0.01389927465357827
          policy_loss: -0.08141363270453408
          total_loss: 0.0517153012765269
          vf_explained_var: 0.9060806632041931
          vf_loss: 0.1264925429549737
    num_agent_steps_sampled: 1519392
    num_agent_steps_trained: 1519392
    num_steps_sampled: 1519392
    num_steps_trained: 1519

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,152,22663.7,1519392,2.57358,12.92,-2.31,93.8396




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1529388
  custom_metrics: {}
  date: 2021-11-07_20-33-47
  done: false
  episode_len_mean: 94.20754716981132
  episode_media: {}
  episode_reward_max: 12.780000000000014
  episode_reward_mean: 3.0327358490566105
  episode_reward_min: -1.9400000000000008
  episodes_this_iter: 106
  episodes_total: 16482
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.494545528012463
          entropy_coeff: 0.01
          kl: 0.01491354715952469
          policy_loss: -0.08309647862353704
          total_loss: 0.05615915631413714
          vf_explained_var: 0.9268737435340881
          vf_loss: 0.1302261640644099
    num_agent_steps_sampled: 1529388
    num_agent_steps_trained: 1529388
    num_steps_sampled: 1529388
    num_steps_trained: 1529

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,153,22812.9,1529388,3.03274,12.78,-1.94,94.2075




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1539384
  custom_metrics: {}
  date: 2021-11-07_20-36-17
  done: false
  episode_len_mean: 94.5377358490566
  episode_media: {}
  episode_reward_max: 9.080000000000007
  episode_reward_mean: 2.6899056603773652
  episode_reward_min: -1.6600000000000008
  episodes_this_iter: 106
  episodes_total: 16588
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5071878834667367
          entropy_coeff: 0.01
          kl: 0.0135145278118282
          policy_loss: -0.08404081961633558
          total_loss: 0.03176670105029375
          vf_explained_var: 0.9178188443183899
          vf_loss: 0.1100916147327576
    num_agent_steps_sampled: 1539384
    num_agent_steps_trained: 1539384
    num_steps_sampled: 1539384
    num_steps_trained: 153938

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,154,22963,1539384,2.68991,9.08,-1.66,94.5377




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1549380
  custom_metrics: {}
  date: 2021-11-07_20-38-46
  done: false
  episode_len_mean: 93.59813084112149
  episode_media: {}
  episode_reward_max: 12.680000000000016
  episode_reward_mean: 2.56971962616823
  episode_reward_min: -1.6600000000000006
  episodes_this_iter: 107
  episodes_total: 16695
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5012220521258492
          entropy_coeff: 0.01
          kl: 0.01424880438530368
          policy_loss: -0.08385573502025033
          total_loss: 0.0484775500674533
          vf_explained_var: 0.9093213677406311
          vf_loss: 0.12488494649067776
    num_agent_steps_sampled: 1549380
    num_agent_steps_trained: 1549380
    num_steps_sampled: 1549380
    num_steps_trained: 15493

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,155,23111.6,1549380,2.56972,12.68,-1.66,93.5981




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1559376
  custom_metrics: {}
  date: 2021-11-07_20-41-22
  done: false
  episode_len_mean: 92.77981651376147
  episode_media: {}
  episode_reward_max: 12.520000000000016
  episode_reward_mean: 2.758623853211015
  episode_reward_min: -2.1
  episodes_this_iter: 109
  episodes_total: 16804
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.50600140869108
          entropy_coeff: 0.01
          kl: 0.013402741524729027
          policy_loss: -0.08904644814280109
          total_loss: 0.02347909927400004
          vf_explained_var: 0.9313921928405762
          vf_loss: 0.10705243941221354
    num_agent_steps_sampled: 1559376
    num_agent_steps_trained: 1559376
    num_steps_sampled: 1559376
    num_steps_trained: 1559376
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,156,23268.1,1559376,2.75862,12.52,-2.1,92.7798




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1569372
  custom_metrics: {}
  date: 2021-11-07_20-44-31
  done: false
  episode_len_mean: 92.3177570093458
  episode_media: {}
  episode_reward_max: 12.610000000000015
  episode_reward_mean: 2.5803738317757063
  episode_reward_min: -1.8200000000000007
  episodes_this_iter: 107
  episodes_total: 16911
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5041604313076054
          entropy_coeff: 0.01
          kl: 0.014510981628238485
          policy_loss: -0.0851977299198381
          total_loss: 0.05241042495601707
          vf_explained_var: 0.9089487195014954
          vf_loss: 0.1295919283810589
    num_agent_steps_sampled: 1569372
    num_agent_steps_trained: 1569372
    num_steps_sampled: 1569372
    num_steps_trained: 1569

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,157,23456.2,1569372,2.58037,12.61,-1.82,92.3178




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1579368
  custom_metrics: {}
  date: 2021-11-07_20-47-03
  done: false
  episode_len_mean: 93.08333333333333
  episode_media: {}
  episode_reward_max: 9.700000000000003
  episode_reward_mean: 2.3020370370370418
  episode_reward_min: -2.369999999999995
  episodes_this_iter: 108
  episodes_total: 17019
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.499976481943049
          entropy_coeff: 0.01
          kl: 0.013473195178533223
          policy_loss: -0.08527443248332821
          total_loss: 0.044042784351314236
          vf_explained_var: 0.9201496243476868
          vf_loss: 0.1236233591229424
    num_agent_steps_sampled: 1579368
    num_agent_steps_trained: 1579368
    num_steps_sampled: 1579368
    num_steps_trained: 1579

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,158,23608.8,1579368,2.30204,9.7,-2.37,93.0833




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1589364
  custom_metrics: {}
  date: 2021-11-07_20-49-35
  done: false
  episode_len_mean: 94.25471698113208
  episode_media: {}
  episode_reward_max: 10.530000000000014
  episode_reward_mean: 2.6404716981132137
  episode_reward_min: -1.5900000000000007
  episodes_this_iter: 106
  episodes_total: 17125
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5020227413911087
          entropy_coeff: 0.01
          kl: 0.014096735678353102
          policy_loss: -0.08444628791612947
          total_loss: 0.03773767793089406
          vf_explained_var: 0.9226403832435608
          vf_loss: 0.11509006553226048
    num_agent_steps_sampled: 1589364
    num_agent_steps_trained: 1589364
    num_steps_sampled: 1589364
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,159,23760.7,1589364,2.64047,10.53,-1.59,94.2547




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1599360
  custom_metrics: {}
  date: 2021-11-07_20-52-02
  done: false
  episode_len_mean: 93.87735849056604
  episode_media: {}
  episode_reward_max: 8.980000000000011
  episode_reward_mean: 1.8694339622641554
  episode_reward_min: -1.6800000000000008
  episodes_this_iter: 106
  episodes_total: 17231
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.499167907747448
          entropy_coeff: 0.01
          kl: 0.012913764113949682
          policy_loss: -0.08782014241044084
          total_loss: 0.013898968742762367
          vf_explained_var: 0.9110407829284668
          vf_loss: 0.09729162003192254
    num_agent_steps_sampled: 1599360
    num_agent_steps_trained: 1599360
    num_steps_sampled: 1599360
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,160,23907.8,1599360,1.86943,8.98,-1.68,93.8774




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1609356
  custom_metrics: {}
  date: 2021-11-07_20-54-36
  done: false
  episode_len_mean: 92.03669724770643
  episode_media: {}
  episode_reward_max: 9.790000000000003
  episode_reward_mean: 2.1825688073394547
  episode_reward_min: -1.810000000000001
  episodes_this_iter: 109
  episodes_total: 17340
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4930569257491673
          entropy_coeff: 0.01
          kl: 0.013179955896844718
          policy_loss: -0.08962078176550249
          total_loss: 0.014709140023646447
          vf_explained_var: 0.924996018409729
          vf_loss: 0.09923490236123275
    num_agent_steps_sampled: 1609356
    num_agent_steps_trained: 1609356
    num_steps_sampled: 1609356
    num_steps_trained: 160

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,161,24061.2,1609356,2.18257,9.79,-1.81,92.0367


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1619352
  custom_metrics: {}
  date: 2021-11-07_20-56-55
  done: false
  episode_len_mean: 92.35514018691589
  episode_media: {}
  episode_reward_max: 6.910000000000011
  episode_reward_mean: 2.569345794392529
  episode_reward_min: -1.5400000000000005
  episodes_this_iter: 107
  episodes_total: 17447
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4846686994927563
          entropy_coeff: 0.01
          kl: 0.014023711833313782
          policy_loss: -0.08728433306703073
          total_loss: 0.0330381511296663
          vf_explained_var: 0.9236302971839905
          vf_loss: 0.11322140129856192
    num_agent_steps_sampled: 1619352
    num_agent_steps_trained: 1619352
    num_steps_sampled: 1619352
    num_steps_trained: 1619

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,162,24200.4,1619352,2.56935,6.91,-1.54,92.3551


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1629348
  custom_metrics: {}
  date: 2021-11-07_20-59-13
  done: false
  episode_len_mean: 92.57798165137615
  episode_media: {}
  episode_reward_max: 9.140000000000013
  episode_reward_mean: 2.709724770642208
  episode_reward_min: -1.4700000000000006
  episodes_this_iter: 109
  episodes_total: 17556
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4833494579690134
          entropy_coeff: 0.01
          kl: 0.014463564881911584
          policy_loss: -0.08373361819574976
          total_loss: 0.05238650601802983
          vf_explained_var: 0.9030975699424744
          vf_loss: 0.1280038090207829
    num_agent_steps_sampled: 1629348
    num_agent_steps_trained: 1629348
    num_steps_sampled: 1629348
    num_steps_trained: 1629

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,163,24338.1,1629348,2.70972,9.14,-1.47,92.578




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1639344
  custom_metrics: {}
  date: 2021-11-07_21-02-10
  done: false
  episode_len_mean: 90.22522522522523
  episode_media: {}
  episode_reward_max: 8.270000000000016
  episode_reward_mean: 2.9801801801801866
  episode_reward_min: -1.5700000000000007
  episodes_this_iter: 111
  episodes_total: 17667
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4788466706235184
          entropy_coeff: 0.01
          kl: 0.013954577788268877
          policy_loss: -0.08251342076855975
          total_loss: 0.039585732403569496
          vf_explained_var: 0.9387529492378235
          vf_loss: 0.11509734574848643
    num_agent_steps_sampled: 1639344
    num_agent_steps_trained: 1639344
    num_steps_sampled: 1639344
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,164,24515.5,1639344,2.98018,8.27,-1.57,90.2252




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1649340
  custom_metrics: {}
  date: 2021-11-07_21-04-39
  done: false
  episode_len_mean: 94.33018867924528
  episode_media: {}
  episode_reward_max: 8.90000000000002
  episode_reward_mean: 2.264622641509439
  episode_reward_min: -1.830000000000001
  episodes_this_iter: 106
  episodes_total: 17773
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5088911570035495
          entropy_coeff: 0.01
          kl: 0.013298022888964084
          policy_loss: -0.08356824507857236
          total_loss: 0.035172594488303885
          vf_explained_var: 0.9182950854301453
          vf_loss: 0.11353519152706633
    num_agent_steps_sampled: 1649340
    num_agent_steps_trained: 1649340
    num_steps_sampled: 1649340
    num_steps_trained: 1649

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,165,24664.5,1649340,2.26462,8.9,-1.83,94.3302




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1659336
  custom_metrics: {}
  date: 2021-11-07_21-07-16
  done: false
  episode_len_mean: 93.04672897196262
  episode_media: {}
  episode_reward_max: 10.920000000000009
  episode_reward_mean: 3.1047663551401943
  episode_reward_min: -1.9200000000000008
  episodes_this_iter: 107
  episodes_total: 17880
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.486359606237493
          entropy_coeff: 0.01
          kl: 0.014472812503618739
          policy_loss: -0.08179042598582868
          total_loss: 0.05887742303709826
          vf_explained_var: 0.9274706244468689
          vf_loss: 0.13256056902325178
    num_agent_steps_sampled: 1659336
    num_agent_steps_trained: 1659336
    num_steps_sampled: 1659336
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,166,24821.6,1659336,3.10477,10.92,-1.92,93.0467




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1669332
  custom_metrics: {}
  date: 2021-11-07_21-09-44
  done: false
  episode_len_mean: 92.6822429906542
  episode_media: {}
  episode_reward_max: 14.660000000000018
  episode_reward_mean: 2.570093457943932
  episode_reward_min: -1.6800000000000006
  episodes_this_iter: 107
  episodes_total: 17987
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.486037085810278
          entropy_coeff: 0.01
          kl: 0.013299104922459296
          policy_loss: -0.08553980101600417
          total_loss: 0.02961656991392374
          vf_explained_var: 0.9282262325286865
          vf_loss: 0.10971971694220845
    num_agent_steps_sampled: 1669332
    num_agent_steps_trained: 1669332
    num_steps_sampled: 1669332
    num_steps_trained: 1669

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,167,24969.6,1669332,2.57009,14.66,-1.68,92.6822


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1679328
  custom_metrics: {}
  date: 2021-11-07_21-12-04
  done: false
  episode_len_mean: 93.00925925925925
  episode_media: {}
  episode_reward_max: 6.990000000000011
  episode_reward_mean: 2.2958333333333383
  episode_reward_min: -1.6200000000000006
  episodes_this_iter: 108
  episodes_total: 18095
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.493429479435978
          entropy_coeff: 0.01
          kl: 0.013355876216310946
          policy_loss: -0.08454160479048634
          total_loss: 0.02791673527377793
          vf_explained_var: 0.9333418607711792
          vf_loss: 0.10696627735279692
    num_agent_steps_sampled: 1679328
    num_agent_steps_trained: 1679328
    num_steps_sampled: 1679328
    num_steps_trained: 167

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,168,25109.6,1679328,2.29583,6.99,-1.62,93.0093




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1689324
  custom_metrics: {}
  date: 2021-11-07_21-14-34
  done: false
  episode_len_mean: 95.11428571428571
  episode_media: {}
  episode_reward_max: 8.790000000000012
  episode_reward_mean: 2.551619047619054
  episode_reward_min: -1.2900000000000005
  episodes_this_iter: 105
  episodes_total: 18200
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4857177204555936
          entropy_coeff: 0.01
          kl: 0.013899246683043402
          policy_loss: -0.08370506766164659
          total_loss: 0.04731809511924019
          vf_explained_var: 0.916820228099823
          vf_loss: 0.12421611771783513
    num_agent_steps_sampled: 1689324
    num_agent_steps_trained: 1689324
    num_steps_sampled: 1689324
    num_steps_trained: 1689

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,169,25259.5,1689324,2.55162,8.79,-1.29,95.1143




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1699320
  custom_metrics: {}
  date: 2021-11-07_21-17-05
  done: false
  episode_len_mean: 94.06603773584905
  episode_media: {}
  episode_reward_max: 10.800000000000015
  episode_reward_mean: 2.7833018867924597
  episode_reward_min: -2.190000000000001
  episodes_this_iter: 106
  episodes_total: 18306
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4752386747262416
          entropy_coeff: 0.01
          kl: 0.013857001297918703
          policy_loss: -0.08190970837703755
          total_loss: 0.0506371114960211
          vf_explained_var: 0.9190250039100647
          vf_loss: 0.1257312230296178
    num_agent_steps_sampled: 1699320
    num_agent_steps_trained: 1699320
    num_steps_sampled: 1699320
    num_steps_trained: 1699

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,170,25410.2,1699320,2.7833,10.8,-2.19,94.066




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1709316
  custom_metrics: {}
  date: 2021-11-07_21-19-34
  done: false
  episode_len_mean: 94.61320754716981
  episode_media: {}
  episode_reward_max: 9.010000000000014
  episode_reward_mean: 2.913490566037743
  episode_reward_min: -1.4200000000000006
  episodes_this_iter: 106
  episodes_total: 18412
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4938581140632303
          entropy_coeff: 0.01
          kl: 0.013303552829098607
          policy_loss: -0.08260646380611464
          total_loss: 0.03762702969277007
          vf_explained_var: 0.9238666296005249
          vf_loss: 0.11486491666804267
    num_agent_steps_sampled: 1709316
    num_agent_steps_trained: 1709316
    num_steps_sampled: 1709316
    num_steps_trained: 170

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,171,25559.5,1709316,2.91349,9.01,-1.42,94.6132




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1719312
  custom_metrics: {}
  date: 2021-11-07_21-22-13
  done: false
  episode_len_mean: 93.90566037735849
  episode_media: {}
  episode_reward_max: 12.510000000000018
  episode_reward_mean: 2.494716981132081
  episode_reward_min: -1.8800000000000008
  episodes_this_iter: 106
  episodes_total: 18518
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.491127127663702
          entropy_coeff: 0.01
          kl: 0.012997578199604905
          policy_loss: -0.0838358285303554
          total_loss: 0.038345688320377956
          vf_explained_var: 0.911868155002594
          vf_loss: 0.11748267979735239
    num_agent_steps_sampled: 1719312
    num_agent_steps_trained: 1719312
    num_steps_sampled: 1719312
    num_steps_trained: 1719

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,172,25718.1,1719312,2.49472,12.51,-1.88,93.9057




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1729308
  custom_metrics: {}
  date: 2021-11-07_21-24-52
  done: false
  episode_len_mean: 94.23584905660377
  episode_media: {}
  episode_reward_max: 8.550000000000015
  episode_reward_mean: 2.16462264150944
  episode_reward_min: -1.5900000000000007
  episodes_this_iter: 106
  episodes_total: 18624
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5054053553149234
          entropy_coeff: 0.01
          kl: 0.012531929930903192
          policy_loss: -0.08476313841807791
          total_loss: 0.019460496987001252
          vf_explained_var: 0.9213221073150635
          vf_loss: 0.10072838632453583
    num_agent_steps_sampled: 1729308
    num_agent_steps_trained: 1729308
    num_steps_sampled: 1729308
    num_steps_trained: 172

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,173,25876.6,1729308,2.16462,8.55,-1.59,94.2358


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1739304
  custom_metrics: {}
  date: 2021-11-07_21-27-09
  done: false
  episode_len_mean: 95.4
  episode_media: {}
  episode_reward_max: 12.310000000000018
  episode_reward_mean: 2.4759047619047676
  episode_reward_min: -1.7500000000000009
  episodes_this_iter: 105
  episodes_total: 18729
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.496442183877668
          entropy_coeff: 0.01
          kl: 0.013701890332941034
          policy_loss: -0.08445279306262476
          total_loss: 0.03757826785246531
          vf_explained_var: 0.9124161005020142
          vf_loss: 0.11578086371589293
    num_agent_steps_sampled: 1739304
    num_agent_steps_trained: 1739304
    num_steps_sampled: 1739304
    num_steps_trained: 1739304
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,174,26013.8,1739304,2.4759,12.31,-1.75,95.4




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1749300
  custom_metrics: {}
  date: 2021-11-07_21-29-38
  done: false
  episode_len_mean: 94.5754716981132
  episode_media: {}
  episode_reward_max: 8.920000000000016
  episode_reward_mean: 2.405000000000006
  episode_reward_min: -1.770000000000001
  episodes_this_iter: 106
  episodes_total: 18835
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.497209001198793
          entropy_coeff: 0.01
          kl: 0.012401346041311077
          policy_loss: -0.08944979542468348
          total_loss: 0.0033139200053281252
          vf_explained_var: 0.9298809170722961
          vf_loss: 0.08948398683076868
    num_agent_steps_sampled: 1749300
    num_agent_steps_trained: 1749300
    num_steps_sampled: 1749300
    num_steps_trained: 1749

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,175,26162.6,1749300,2.405,8.92,-1.77,94.5755




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1759296
  custom_metrics: {}
  date: 2021-11-07_21-32-19
  done: false
  episode_len_mean: 95.26666666666667
  episode_media: {}
  episode_reward_max: 10.750000000000007
  episode_reward_mean: 2.6709523809523867
  episode_reward_min: -1.7400000000000009
  episodes_this_iter: 105
  episodes_total: 18940
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5009829095286182
          entropy_coeff: 0.01
          kl: 0.014246555622618656
          policy_loss: -0.08409645569184397
          total_loss: 0.06032980394302907
          vf_explained_var: 0.9122642874717712
          vf_loss: 0.1369806547418364
    num_agent_steps_sampled: 1759296
    num_agent_steps_trained: 1759296
    num_steps_sampled: 1759296
    num_steps_trained: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,176,26323.7,1759296,2.67095,10.75,-1.74,95.2667




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1769292
  custom_metrics: {}
  date: 2021-11-07_21-35-40
  done: false
  episode_len_mean: 92.83333333333333
  episode_media: {}
  episode_reward_max: 10.410000000000013
  episode_reward_mean: 2.4714814814814865
  episode_reward_min: -1.7700000000000007
  episodes_this_iter: 108
  episodes_total: 19048
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.5049342055606028
          entropy_coeff: 0.01
          kl: 0.013253675265920186
          policy_loss: -0.08043152732758695
          total_loss: 0.04346637849926821
          vf_explained_var: 0.9096168279647827
          vf_loss: 0.1187537181055826
    num_agent_steps_sampled: 1769292
    num_agent_steps_trained: 1769292
    num_steps_sampled: 1769292
    num_steps_trained: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,177,26524.6,1769292,2.47148,10.41,-1.77,92.8333




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1779288
  custom_metrics: {}
  date: 2021-11-07_21-38-07
  done: false
  episode_len_mean: 94.82075471698113
  episode_media: {}
  episode_reward_max: 8.850000000000014
  episode_reward_mean: 2.5637735849056673
  episode_reward_min: -1.9900000000000009
  episodes_this_iter: 106
  episodes_total: 19154
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.497721405314584
          entropy_coeff: 0.01
          kl: 0.01336934220276464
          policy_loss: -0.08569312230325662
          total_loss: 0.04093616109771224
          vf_explained_var: 0.9175637364387512
          vf_loss: 0.12114946283400059
    num_agent_steps_sampled: 1779288
    num_agent_steps_trained: 1779288
    num_steps_sampled: 1779288
    num_steps_trained: 1779

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,178,26672,1779288,2.56377,8.85,-1.99,94.8208




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1789284
  custom_metrics: {}
  date: 2021-11-07_21-40-52
  done: false
  episode_len_mean: 92.22429906542057
  episode_media: {}
  episode_reward_max: 14.820000000000013
  episode_reward_mean: 2.3332710280373883
  episode_reward_min: -1.7700000000000007
  episodes_this_iter: 107
  episodes_total: 19261
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4998962671328813
          entropy_coeff: 0.01
          kl: 0.013103294923987846
          policy_loss: -0.08159870983173068
          total_loss: 0.031305365673162874
          vf_explained_var: 0.9058298468589783
          vf_loss: 0.10805209430141581
    num_agent_steps_sampled: 1789284
    num_agent_steps_trained: 1789284
    num_steps_sampled: 1789284
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,179,26837.2,1789284,2.33327,14.82,-1.77,92.2243




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1799280
  custom_metrics: {}
  date: 2021-11-07_21-43-26
  done: false
  episode_len_mean: 93.86915887850468
  episode_media: {}
  episode_reward_max: 12.360000000000019
  episode_reward_mean: 2.6966355140186975
  episode_reward_min: -2.0699999999999994
  episodes_this_iter: 107
  episodes_total: 19368
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4980415060988856
          entropy_coeff: 0.01
          kl: 0.012931673592034513
          policy_loss: -0.08210908189479611
          total_loss: 0.048171719099180055
          vf_explained_var: 0.9185910224914551
          vf_loss: 0.12580124679475257
    num_agent_steps_sampled: 1799280
    num_agent_steps_trained: 1799280
    num_steps_sampled: 1799280
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,180,26990.4,1799280,2.69664,12.36,-2.07,93.8692




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1809276
  custom_metrics: {}
  date: 2021-11-07_21-45-58
  done: false
  episode_len_mean: 92.62037037037037
  episode_media: {}
  episode_reward_max: 9.730000000000004
  episode_reward_mean: 2.27694444444445
  episode_reward_min: -1.850000000000001
  episodes_this_iter: 108
  episodes_total: 19476
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4965605517737885
          entropy_coeff: 0.01
          kl: 0.013242299555575677
          policy_loss: -0.08351266875258113
          total_loss: 0.03198098014626238
          vf_explained_var: 0.9281731247901917
          vf_loss: 0.11029163941374828
    num_agent_steps_sampled: 1809276
    num_agent_steps_trained: 1809276
    num_steps_sampled: 1809276
    num_steps_trained: 18092

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,181,27142.2,1809276,2.27694,9.73,-1.85,92.6204




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1819272
  custom_metrics: {}
  date: 2021-11-07_21-48-50
  done: false
  episode_len_mean: 92.68518518518519
  episode_media: {}
  episode_reward_max: 12.510000000000014
  episode_reward_mean: 2.3831481481481527
  episode_reward_min: -1.7500000000000007
  episodes_this_iter: 108
  episodes_total: 19584
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4994335684001956
          entropy_coeff: 0.01
          kl: 0.012983884315811203
          policy_loss: -0.08197500161094289
          total_loss: 0.04365232595935082
          vf_explained_var: 0.9064947962760925
          vf_loss: 0.1210427506126336
    num_agent_steps_sampled: 1819272
    num_agent_steps_trained: 1819272
    num_steps_sampled: 1819272
    num_steps_trained: 18

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,182,27314.9,1819272,2.38315,12.51,-1.75,92.6852


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1829268
  custom_metrics: {}
  date: 2021-11-07_21-51-05
  done: false
  episode_len_mean: 92.92592592592592
  episode_media: {}
  episode_reward_max: 14.690000000000014
  episode_reward_mean: 2.6925000000000066
  episode_reward_min: -1.7600000000000011
  episodes_this_iter: 108
  episodes_total: 19692
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4823880929213304
          entropy_coeff: 0.01
          kl: 0.013765939347947618
          policy_loss: -0.0804736979624145
          total_loss: 0.05068903497269011
          vf_explained_var: 0.9318801760673523
          vf_loss: 0.12462608156025283
    num_agent_steps_sampled: 1829268
    num_agent_steps_trained: 1829268
    num_steps_sampled: 1829268
    num_steps_trained: 18

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,183,27450,1829268,2.6925,14.69,-1.76,92.9259




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1839264
  custom_metrics: {}
  date: 2021-11-07_21-53-35
  done: false
  episode_len_mean: 91.50925925925925
  episode_media: {}
  episode_reward_max: 8.960000000000013
  episode_reward_mean: 2.3322222222222275
  episode_reward_min: -1.97
  episodes_this_iter: 108
  episodes_total: 19800
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.478398217706599
          entropy_coeff: 0.01
          kl: 0.014325292298760669
          policy_loss: -0.08288297479351361
          total_loss: 0.0658843132476203
          vf_explained_var: 0.9184994697570801
          vf_loss: 0.14091646270587657
    num_agent_steps_sampled: 1839264
    num_agent_steps_trained: 1839264
    num_steps_sampled: 1839264
    num_steps_trained: 1839264
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,184,27599.8,1839264,2.33222,8.96,-1.97,91.5093




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1849260
  custom_metrics: {}
  date: 2021-11-07_21-56-04
  done: false
  episode_len_mean: 94.13207547169812
  episode_media: {}
  episode_reward_max: 10.120000000000017
  episode_reward_mean: 2.9200943396226484
  episode_reward_min: -1.5400000000000007
  episodes_this_iter: 106
  episodes_total: 19906
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.48120036369715
          entropy_coeff: 0.01
          kl: 0.013362941080297898
          policy_loss: -0.07989111063189995
          total_loss: 0.05719672395155216
          vf_explained_var: 0.923953652381897
          vf_loss: 0.13145738732165252
    num_agent_steps_sampled: 1849260
    num_agent_steps_trained: 1849260
    num_steps_sampled: 1849260
    num_steps_trained: 1849

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,185,27748.8,1849260,2.92009,10.12,-1.54,94.1321




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1859256
  custom_metrics: {}
  date: 2021-11-07_21-58-53
  done: false
  episode_len_mean: 91.97272727272727
  episode_media: {}
  episode_reward_max: 9.810000000000002
  episode_reward_mean: 2.463636363636369
  episode_reward_min: -1.6500000000000008
  episodes_this_iter: 110
  episodes_total: 20016
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4869230885790965
          entropy_coeff: 0.01
          kl: 0.013659398920073595
          policy_loss: -0.08259317028200906
          total_loss: 0.04225643621996427
          vf_explained_var: 0.9240238666534424
          vf_loss: 0.11860101799456738
    num_agent_steps_sampled: 1859256
    num_agent_steps_trained: 1859256
    num_steps_sampled: 1859256
    num_steps_trained: 185

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,186,27917.2,1859256,2.46364,9.81,-1.65,91.9727


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1869252
  custom_metrics: {}
  date: 2021-11-07_22-01-09
  done: false
  episode_len_mean: 93.23584905660377
  episode_media: {}
  episode_reward_max: 8.780000000000014
  episode_reward_mean: 2.4973584905660435
  episode_reward_min: -1.6900000000000008
  episodes_this_iter: 106
  episodes_total: 20122
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.478222561493898
          entropy_coeff: 0.01
          kl: 0.01349381159703376
          policy_loss: -0.0814872259282085
          total_loss: 0.03785077033277887
          vf_explained_var: 0.9361218810081482
          vf_loss: 0.11337963186729795
    num_agent_steps_sampled: 1869252
    num_agent_steps_trained: 1869252
    num_steps_sampled: 1869252
    num_steps_trained: 18692

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,187,28053.8,1869252,2.49736,8.78,-1.69,93.2358


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1879248
  custom_metrics: {}
  date: 2021-11-07_22-03-28
  done: false
  episode_len_mean: 92.91666666666667
  episode_media: {}
  episode_reward_max: 10.750000000000018
  episode_reward_mean: 2.3060185185185245
  episode_reward_min: -1.6400000000000008
  episodes_this_iter: 108
  episodes_total: 20230
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.490180923999884
          entropy_coeff: 0.01
          kl: 0.014074030814375391
          policy_loss: -0.08216667233401129
          total_loss: 0.04854108222051818
          vf_explained_var: 0.9192240834236145
          vf_loss: 0.12354716164036057
    num_agent_steps_sampled: 1879248
    num_agent_steps_trained: 1879248
    num_steps_sampled: 1879248
    num_steps_trained: 18

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,188,28192.9,1879248,2.30602,10.75,-1.64,92.9167




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1889244
  custom_metrics: {}
  date: 2021-11-07_22-06-13
  done: false
  episode_len_mean: 92.97196261682242
  episode_media: {}
  episode_reward_max: 8.830000000000016
  episode_reward_mean: 1.9543925233644914
  episode_reward_min: -2.15
  episodes_this_iter: 107
  episodes_total: 20337
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4875909611710116
          entropy_coeff: 0.01
          kl: 0.01322862519366267
          policy_loss: -0.07967934562291346
          total_loss: 0.04060463142286763
          vf_explained_var: 0.9192461967468262
          vf_loss: 0.11502342146590479
    num_agent_steps_sampled: 1889244
    num_agent_steps_trained: 1889244
    num_steps_sampled: 1889244
    num_steps_trained: 1889244
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,189,28357.2,1889244,1.95439,8.83,-2.15,92.972




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1899240
  custom_metrics: {}
  date: 2021-11-07_22-08-42
  done: false
  episode_len_mean: 93.1574074074074
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 2.7580555555555617
  episode_reward_min: -2.000000000000001
  episodes_this_iter: 108
  episodes_total: 20445
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4732870468726524
          entropy_coeff: 0.01
          kl: 0.013631111715916274
          policy_loss: -0.08354456674976227
          total_loss: 0.03970547450754123
          vf_explained_var: 0.9167192578315735
          vf_loss: 0.11692953433156905
    num_agent_steps_sampled: 1899240
    num_agent_steps_trained: 1899240
    num_steps_sampled: 1899240
    num_steps_trained: 1899

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,190,28506,1899240,2.75806,9.86,-2,93.1574




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1909236
  custom_metrics: {}
  date: 2021-11-07_22-11-09
  done: false
  episode_len_mean: 92.45871559633028
  episode_media: {}
  episode_reward_max: 10.780000000000014
  episode_reward_mean: 2.540642201834868
  episode_reward_min: -1.5600000000000007
  episodes_this_iter: 109
  episodes_total: 20554
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4872780164082844
          entropy_coeff: 0.01
          kl: 0.013190809415554498
          policy_loss: -0.08455597335297582
          total_loss: 0.03859925648977614
          vf_explained_var: 0.9229960441589355
          vf_loss: 0.11797769426758219
    num_agent_steps_sampled: 1909236
    num_agent_steps_trained: 1909236
    num_steps_sampled: 1909236
    num_steps_trained: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,191,28653.7,1909236,2.54064,10.78,-1.56,92.4587




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1919232
  custom_metrics: {}
  date: 2021-11-07_22-13-39
  done: false
  episode_len_mean: 94.33962264150944
  episode_media: {}
  episode_reward_max: 12.660000000000016
  episode_reward_mean: 2.5474528301886847
  episode_reward_min: -1.6000000000000005
  episodes_this_iter: 106
  episodes_total: 20660
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.480414307423127
          entropy_coeff: 0.01
          kl: 0.013445245636274368
          policy_loss: -0.08378128657101566
          total_loss: 0.038911850695522165
          vf_explained_var: 0.9119445085525513
          vf_loss: 0.11686732991733867
    num_agent_steps_sampled: 1919232
    num_agent_steps_trained: 1919232
    num_steps_sampled: 1919232
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,192,28803.7,1919232,2.54745,12.66,-1.6,94.3396


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1929228
  custom_metrics: {}
  date: 2021-11-07_22-15-57
  done: false
  episode_len_mean: 91.79629629629629
  episode_media: {}
  episode_reward_max: 11.010000000000012
  episode_reward_mean: 2.637407407407414
  episode_reward_min: -1.4400000000000006
  episodes_this_iter: 108
  episodes_total: 20768
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4651941242381037
          entropy_coeff: 0.01
          kl: 0.014258591140239479
          policy_loss: -0.08329790932022862
          total_loss: 0.05314183897442288
          vf_explained_var: 0.922923743724823
          vf_loss: 0.12860883705031415
    num_agent_steps_sampled: 1929228
    num_agent_steps_trained: 1929228
    num_steps_sampled: 1929228
    num_steps_trained: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,193,28941.7,1929228,2.63741,11.01,-1.44,91.7963




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1939224
  custom_metrics: {}
  date: 2021-11-07_22-18-25
  done: false
  episode_len_mean: 93.52830188679245
  episode_media: {}
  episode_reward_max: 9.88
  episode_reward_mean: 2.4334905660377415
  episode_reward_min: -2.1800000000000006
  episodes_this_iter: 106
  episodes_total: 20874
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.48040058938866
          entropy_coeff: 0.01
          kl: 0.012918982393857662
          policy_loss: -0.08560438685628594
          total_loss: 0.018435121876880144
          vf_explained_var: 0.9241398572921753
          vf_loss: 0.09941245616994734
    num_agent_steps_sampled: 1939224
    num_agent_steps_trained: 1939224
    num_steps_sampled: 1939224
    num_steps_trained: 1939224
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,194,29089.5,1939224,2.43349,9.88,-2.18,93.5283


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1949220
  custom_metrics: {}
  date: 2021-11-07_22-20-43
  done: false
  episode_len_mean: 93.55555555555556
  episode_media: {}
  episode_reward_max: 12.830000000000016
  episode_reward_mean: 2.4630555555555618
  episode_reward_min: -1.6800000000000006
  episodes_this_iter: 108
  episodes_total: 20982
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.470492130263239
          entropy_coeff: 0.01
          kl: 0.01281378116875914
          policy_loss: -0.08264486321494875
          total_loss: 0.023150325953387296
          vf_explained_var: 0.9178274869918823
          vf_loss: 0.10130871435802462
    num_agent_steps_sampled: 1949220
    num_agent_steps_trained: 1949220
    num_steps_sampled: 1949220
    num_steps_trained: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,195,29226.8,1949220,2.46306,12.83,-1.68,93.5556




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1959216
  custom_metrics: {}
  date: 2021-11-07_22-23-11
  done: false
  episode_len_mean: 92.29357798165138
  episode_media: {}
  episode_reward_max: 8.520000000000014
  episode_reward_mean: 2.48247706422019
  episode_reward_min: -1.5700000000000007
  episodes_this_iter: 109
  episodes_total: 21091
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4725735945579332
          entropy_coeff: 0.01
          kl: 0.01331884161502398
          policy_loss: -0.08060906767670033
          total_loss: 0.05388967698662836
          vf_explained_var: 0.9079403877258301
          vf_loss: 0.1288824937083464
    num_agent_steps_sampled: 1959216
    num_agent_steps_trained: 1959216
    num_steps_sampled: 1959216
    num_steps_trained: 195921

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,196,29375,1959216,2.48248,8.52,-1.57,92.2936




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1969212
  custom_metrics: {}
  date: 2021-11-07_22-25-54
  done: false
  episode_len_mean: 91.98165137614679
  episode_media: {}
  episode_reward_max: 10.980000000000013
  episode_reward_mean: 3.0389908256880807
  episode_reward_min: -1.4100000000000008
  episodes_this_iter: 109
  episodes_total: 21200
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.453698437234275
          entropy_coeff: 0.01
          kl: 0.013798074797966667
          policy_loss: -0.0777343698283737
          total_loss: 0.08057953836794338
          vf_explained_var: 0.916746973991394
          vf_loss: 0.15141715286617988
    num_agent_steps_sampled: 1969212
    num_agent_steps_trained: 1969212
    num_steps_sampled: 1969212
    num_steps_trained: 1969

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,197,29538.3,1969212,3.03899,10.98,-1.41,91.9817




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1979208
  custom_metrics: {}
  date: 2021-11-07_22-28-37
  done: false
  episode_len_mean: 90.95412844036697
  episode_media: {}
  episode_reward_max: 9.830000000000002
  episode_reward_mean: 2.5399082568807394
  episode_reward_min: -1.6300000000000006
  episodes_this_iter: 109
  episodes_total: 21309
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4666767776521863
          entropy_coeff: 0.01
          kl: 0.014066397543278193
          policy_loss: -0.08369048654905752
          total_loss: 0.0642306826610723
          vf_explained_var: 0.9032118320465088
          vf_loss: 0.14054292289890413
    num_agent_steps_sampled: 1979208
    num_agent_steps_trained: 1979208
    num_steps_sampled: 1979208
    num_steps_trained: 197

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,198,29701.3,1979208,2.53991,9.83,-1.63,90.9541


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1989204
  custom_metrics: {}
  date: 2021-11-07_22-30-53
  done: false
  episode_len_mean: 92.88785046728972
  episode_media: {}
  episode_reward_max: 10.520000000000017
  episode_reward_mean: 2.1661682242990707
  episode_reward_min: -1.890000000000001
  episodes_this_iter: 107
  episodes_total: 21416
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4556996129516864
          entropy_coeff: 0.01
          kl: 0.012551111875629859
          policy_loss: -0.08711492885700148
          total_loss: 0.025057463669496724
          vf_explained_var: 0.9135725498199463
          vf_loss: 0.10813638626669463
    num_agent_steps_sampled: 1989204
    num_agent_steps_trained: 1989204
    num_steps_sampled: 1989204
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,199,29837.5,1989204,2.16617,10.52,-1.89,92.8879




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 1999200
  custom_metrics: {}
  date: 2021-11-07_22-33-25
  done: false
  episode_len_mean: 90.33035714285714
  episode_media: {}
  episode_reward_max: 8.840000000000016
  episode_reward_mean: 2.679375000000006
  episode_reward_min: -1.6800000000000006
  episodes_this_iter: 112
  episodes_total: 21528
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4412955080342087
          entropy_coeff: 0.01
          kl: 0.014237558111562975
          policy_loss: -0.08323672758972543
          total_loss: 0.06368961046871721
          vf_explained_var: 0.9057791233062744
          vf_loss: 0.1389043561803798
    num_agent_steps_sampled: 1999200
    num_agent_steps_trained: 1999200
    num_steps_sampled: 1999200
    num_steps_trained: 1999

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,200,29989,1999200,2.67938,8.84,-1.68,90.3304




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2009196
  custom_metrics: {}
  date: 2021-11-07_22-36-07
  done: false
  episode_len_mean: 89.13636363636364
  episode_media: {}
  episode_reward_max: 12.820000000000013
  episode_reward_mean: 2.893181818181824
  episode_reward_min: -1.3900000000000006
  episodes_this_iter: 110
  episodes_total: 21638
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.432646731026152
          entropy_coeff: 0.01
          kl: 0.013266861386720891
          policy_loss: -0.08389140463346599
          total_loss: 0.04296384808472079
          vf_explained_var: 0.9189626574516296
          vf_loss: 0.12095815040823868
    num_agent_steps_sampled: 2009196
    num_agent_steps_trained: 2009196
    num_steps_sampled: 2009196
    num_steps_trained: 200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,201,30150.6,2009196,2.89318,12.82,-1.39,89.1364




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2019192
  custom_metrics: {}
  date: 2021-11-07_22-38-55
  done: false
  episode_len_mean: 91.34234234234235
  episode_media: {}
  episode_reward_max: 12.710000000000019
  episode_reward_mean: 2.8685585585585653
  episode_reward_min: -1.900000000000001
  episodes_this_iter: 111
  episodes_total: 21749
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4340811167007836
          entropy_coeff: 0.01
          kl: 0.013145745956753928
          policy_loss: -0.08470721060298701
          total_loss: 0.0386413853830443
          vf_explained_var: 0.9280899167060852
          vf_loss: 0.1177417530813533
    num_agent_steps_sampled: 2019192
    num_agent_steps_trained: 2019192
    num_steps_sampled: 2019192
    num_steps_trained: 2019

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,202,30319.1,2019192,2.86856,12.71,-1.9,91.3423




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2029188
  custom_metrics: {}
  date: 2021-11-07_22-41-30
  done: false
  episode_len_mean: 89.04504504504504
  episode_media: {}
  episode_reward_max: 10.640000000000017
  episode_reward_mean: 2.88360360360361
  episode_reward_min: -1.7400000000000009
  episodes_this_iter: 111
  episodes_total: 21860
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4249969724915985
          entropy_coeff: 0.01
          kl: 0.013204541109564791
          policy_loss: -0.08600559530572759
          total_loss: 0.035291551359188864
          vf_explained_var: 0.9352210164070129
          vf_loss: 0.11546552089783244
    num_agent_steps_sampled: 2029188
    num_agent_steps_trained: 2029188
    num_steps_sampled: 2029188
    num_steps_trained: 20

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,203,30474.2,2029188,2.8836,10.64,-1.74,89.045




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2039184
  custom_metrics: {}
  date: 2021-11-07_22-44-03
  done: false
  episode_len_mean: 90.83783783783784
  episode_media: {}
  episode_reward_max: 9.080000000000013
  episode_reward_mean: 2.583603603603609
  episode_reward_min: -1.5500000000000007
  episodes_this_iter: 111
  episodes_total: 21971
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4271342412019385
          entropy_coeff: 0.01
          kl: 0.013669024650101167
          policy_loss: -0.08379512170059049
          total_loss: 0.04042321679333591
          vf_explained_var: 0.9259775876998901
          vf_loss: 0.11734993286335316
    num_agent_steps_sampled: 2039184
    num_agent_steps_trained: 2039184
    num_steps_sampled: 2039184
    num_steps_trained: 203

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,204,30626.6,2039184,2.5836,9.08,-1.55,90.8378




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2049180
  custom_metrics: {}
  date: 2021-11-07_22-46-31
  done: false
  episode_len_mean: 91.92660550458716
  episode_media: {}
  episode_reward_max: 10.830000000000013
  episode_reward_mean: 2.8189908256880782
  episode_reward_min: -2.04
  episodes_this_iter: 109
  episodes_total: 22080
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4393403691104334
          entropy_coeff: 0.01
          kl: 0.01420109201575802
          policy_loss: -0.07825294896068737
          total_loss: 0.07698127779966364
          vf_explained_var: 0.9042829871177673
          vf_loss: 0.14727576605291065
    num_agent_steps_sampled: 2049180
    num_agent_steps_trained: 2049180
    num_steps_sampled: 2049180
    num_steps_trained: 2049180
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,205,30774.5,2049180,2.81899,10.83,-2.04,91.9266


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2059176
  custom_metrics: {}
  date: 2021-11-07_22-48-49
  done: false
  episode_len_mean: 91.6697247706422
  episode_media: {}
  episode_reward_max: 8.830000000000014
  episode_reward_mean: 2.591834862385328
  episode_reward_min: -1.990000000000001
  episodes_this_iter: 109
  episodes_total: 22189
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4383618919258443
          entropy_coeff: 0.01
          kl: 0.014206329791655944
          policy_loss: -0.08034616080232156
          total_loss: 0.06386735250488815
          vf_explained_var: 0.898929238319397
          vf_loss: 0.13623333629857526
    num_agent_steps_sampled: 2059176
    num_agent_steps_trained: 2059176
    num_steps_sampled: 2059176
    num_steps_trained: 205917

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,206,30912.3,2059176,2.59183,8.83,-1.99,91.6697




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2069172
  custom_metrics: {}
  date: 2021-11-07_22-52-20
  done: false
  episode_len_mean: 87.30701754385964
  episode_media: {}
  episode_reward_max: 9.850000000000001
  episode_reward_mean: 2.7931578947368476
  episode_reward_min: -1.820000000000001
  episodes_this_iter: 114
  episodes_total: 22303
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4289880351123645
          entropy_coeff: 0.01
          kl: 0.01321778632280097
          policy_loss: -0.08177710978839642
          total_loss: 0.05385889108491759
          vf_explained_var: 0.9164316058158875
          vf_loss: 0.12981411098128456
    num_agent_steps_sampled: 2069172
    num_agent_steps_trained: 2069172
    num_steps_sampled: 2069172
    num_steps_trained: 2069

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,207,31123.6,2069172,2.79316,9.85,-1.82,87.307




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2079168
  custom_metrics: {}
  date: 2021-11-07_22-55-31
  done: false
  episode_len_mean: 89.13392857142857
  episode_media: {}
  episode_reward_max: 11.08000000000001
  episode_reward_mean: 2.7214285714285773
  episode_reward_min: -1.7200000000000009
  episodes_this_iter: 112
  episodes_total: 22415
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.421975537854382
          entropy_coeff: 0.01
          kl: 0.01324625999713596
          policy_loss: -0.07952383758229578
          total_loss: 0.04261264558045719
          vf_explained_var: 0.9257248044013977
          vf_loss: 0.1161796016463389
    num_agent_steps_sampled: 2079168
    num_agent_steps_trained: 2079168
    num_steps_sampled: 2079168
    num_steps_trained: 20791

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,208,31314.3,2079168,2.72143,11.08,-1.72,89.1339




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2089164
  custom_metrics: {}
  date: 2021-11-07_22-58-36
  done: false
  episode_len_mean: 90.04504504504504
  episode_media: {}
  episode_reward_max: 12.830000000000014
  episode_reward_mean: 2.5021621621621675
  episode_reward_min: -1.7100000000000009
  episodes_this_iter: 111
  episodes_total: 22526
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.425171137263632
          entropy_coeff: 0.01
          kl: 0.013114875562490327
          policy_loss: -0.08692511398440753
          total_loss: 0.02457035875552867
          vf_explained_var: 0.9296663403511047
          vf_loss: 0.10586985778222736
    num_agent_steps_sampled: 2089164
    num_agent_steps_trained: 2089164
    num_steps_sampled: 2089164
    num_steps_trained: 20

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,209,31499.9,2089164,2.50216,12.83,-1.71,90.045




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2099160
  custom_metrics: {}
  date: 2021-11-07_23-01-12
  done: false
  episode_len_mean: 90.33333333333333
  episode_media: {}
  episode_reward_max: 11.010000000000018
  episode_reward_mean: 2.897297297297304
  episode_reward_min: -1.7300000000000006
  episodes_this_iter: 111
  episodes_total: 22637
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4199124965912255
          entropy_coeff: 0.01
          kl: 0.0132829875829255
          policy_loss: -0.08556915233787309
          total_loss: 0.03331526176462698
          vf_explained_var: 0.9272594451904297
          vf_loss: 0.1128232328253042
    num_agent_steps_sampled: 2099160
    num_agent_steps_trained: 2099160
    num_steps_sampled: 2099160
    num_steps_trained: 20991

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,210,31655.2,2099160,2.8973,11.01,-1.73,90.3333




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2109156
  custom_metrics: {}
  date: 2021-11-07_23-03-55
  done: false
  episode_len_mean: 88.0
  episode_media: {}
  episode_reward_max: 13.290000000000013
  episode_reward_mean: 2.7593805309734574
  episode_reward_min: -1.5300000000000005
  episodes_this_iter: 113
  episodes_total: 22750
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4219688484811375
          entropy_coeff: 0.01
          kl: 0.013384187758276362
          policy_loss: -0.08047838811086029
          total_loss: 0.04059246451331255
          vf_explained_var: 0.9223693609237671
          vf_loss: 0.114799687338786
    num_agent_steps_sampled: 2109156
    num_agent_steps_trained: 2109156
    num_steps_sampled: 2109156
    num_steps_trained: 2109156
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,211,31819,2109156,2.75938,13.29,-1.53,88




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2119152
  custom_metrics: {}
  date: 2021-11-07_23-06-31
  done: false
  episode_len_mean: 89.33628318584071
  episode_media: {}
  episode_reward_max: 14.690000000000017
  episode_reward_mean: 2.5025663716814206
  episode_reward_min: -1.8200000000000007
  episodes_this_iter: 113
  episodes_total: 22863
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.431667758868291
          entropy_coeff: 0.01
          kl: 0.012448573443212225
          policy_loss: -0.08492764939959997
          total_loss: 0.02323770373263675
          vf_explained_var: 0.9244449734687805
          vf_loss: 0.10412262301876123
    num_agent_steps_sampled: 2119152
    num_agent_steps_trained: 2119152
    num_steps_sampled: 2119152
    num_steps_trained: 21

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,212,31974.4,2119152,2.50257,14.69,-1.82,89.3363


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2129148
  custom_metrics: {}
  date: 2021-11-07_23-08-53
  done: false
  episode_len_mean: 88.72321428571429
  episode_media: {}
  episode_reward_max: 11.240000000000013
  episode_reward_mean: 2.7633035714285774
  episode_reward_min: -1.6300000000000008
  episodes_this_iter: 112
  episodes_total: 22975
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4159343322118123
          entropy_coeff: 0.01
          kl: 0.012885501794904256
          policy_loss: -0.08312856007335533
          total_loss: 0.03227042329823996
          vf_explained_var: 0.9275467991828918
          vf_loss: 0.11020354198403338
    num_agent_steps_sampled: 2129148
    num_agent_steps_trained: 2129148
    num_steps_sampled: 2129148
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,213,32116.4,2129148,2.7633,11.24,-1.63,88.7232




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2139144
  custom_metrics: {}
  date: 2021-11-07_23-11-29
  done: false
  episode_len_mean: 88.60176991150442
  episode_media: {}
  episode_reward_max: 14.930000000000012
  episode_reward_mean: 2.756371681415936
  episode_reward_min: -1.5900000000000007
  episodes_this_iter: 113
  episodes_total: 23088
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.421661371247381
          entropy_coeff: 0.01
          kl: 0.012866189527687111
          policy_loss: -0.08258165072999958
          total_loss: 0.033234769221331575
          vf_explained_var: 0.9276599884033203
          vf_loss: 0.11072224428017552
    num_agent_steps_sampled: 2139144
    num_agent_steps_trained: 2139144
    num_steps_sampled: 2139144
    num_steps_trained: 21

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,214,32272.4,2139144,2.75637,14.93,-1.59,88.6018




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2149140
  custom_metrics: {}
  date: 2021-11-07_23-14-02
  done: false
  episode_len_mean: 89.98198198198199
  episode_media: {}
  episode_reward_max: 9.790000000000003
  episode_reward_mean: 2.8933333333333398
  episode_reward_min: -1.6400000000000008
  episodes_this_iter: 111
  episodes_total: 23199
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.423554398259546
          entropy_coeff: 0.01
          kl: 0.012771939072397091
          policy_loss: -0.0805169635046369
          total_loss: 0.038291788715709985
          vf_explained_var: 0.9208767414093018
          vf_loss: 0.11394822071823808
    num_agent_steps_sampled: 2149140
    num_agent_steps_trained: 2149140
    num_steps_sampled: 2149140
    num_steps_trained: 214

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,215,32425.1,2149140,2.89333,9.79,-1.64,89.982




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2159136
  custom_metrics: {}
  date: 2021-11-07_23-16-47
  done: false
  episode_len_mean: 87.96460176991151
  episode_media: {}
  episode_reward_max: 12.650000000000016
  episode_reward_mean: 3.0870796460177052
  episode_reward_min: -1.3700000000000006
  episodes_this_iter: 113
  episodes_total: 23312
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4122575578526555
          entropy_coeff: 0.01
          kl: 0.013264917714080279
          policy_loss: -0.08296794991176097
          total_loss: 0.04605525458215648
          vf_explained_var: 0.9286439418792725
          vf_loss: 0.12292663884611849
    num_agent_steps_sampled: 2159136
    num_agent_steps_trained: 2159136
    num_steps_sampled: 2159136
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,216,32590.7,2159136,3.08708,12.65,-1.37,87.9646




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2169132
  custom_metrics: {}
  date: 2021-11-07_23-19-25
  done: false
  episode_len_mean: 88.21929824561404
  episode_media: {}
  episode_reward_max: 9.050000000000013
  episode_reward_mean: 2.868245614035094
  episode_reward_min: -1.2900000000000005
  episodes_this_iter: 114
  episodes_total: 23426
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.399631421178834
          entropy_coeff: 0.01
          kl: 0.013619638698423205
          policy_loss: -0.08246253994492511
          total_loss: 0.047397060025260486
          vf_explained_var: 0.9287212491035461
          vf_loss: 0.12282867308260284
    num_agent_steps_sampled: 2169132
    num_agent_steps_trained: 2169132
    num_steps_sampled: 2169132
    num_steps_trained: 216

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,217,32748.4,2169132,2.86825,9.05,-1.29,88.2193




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2179128
  custom_metrics: {}
  date: 2021-11-07_23-22-20
  done: false
  episode_len_mean: 88.90090090090091
  episode_media: {}
  episode_reward_max: 11.170000000000012
  episode_reward_mean: 3.2219819819819886
  episode_reward_min: -1.6300000000000008
  episodes_this_iter: 111
  episodes_total: 23537
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3896371168968007
          entropy_coeff: 0.01
          kl: 0.014100093666240885
          policy_loss: -0.08305644752760219
          total_loss: 0.05995728182248198
          vf_explained_var: 0.9278818964958191
          vf_loss: 0.13478832351218942
    num_agent_steps_sampled: 2179128
    num_agent_steps_trained: 2179128
    num_steps_sampled: 2179128
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,218,32923.4,2179128,3.22198,11.17,-1.63,88.9009




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2189124
  custom_metrics: {}
  date: 2021-11-07_23-25-10
  done: false
  episode_len_mean: 88.04347826086956
  episode_media: {}
  episode_reward_max: 15.100000000000016
  episode_reward_mean: 2.9966956521739183
  episode_reward_min: -1.6300000000000006
  episodes_this_iter: 115
  episodes_total: 23652
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.394678037798303
          entropy_coeff: 0.01
          kl: 0.01410589935626363
          policy_loss: -0.07725582011044026
          total_loss: 0.07793006231196416
          vf_explained_var: 0.9190701246261597
          vf_loss: 0.14699766056723573
    num_agent_steps_sampled: 2189124
    num_agent_steps_trained: 2189124
    num_steps_sampled: 2189124
    num_steps_trained: 218

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,219,33093.8,2189124,2.9967,15.1,-1.63,88.0435




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2199120
  custom_metrics: {}
  date: 2021-11-07_23-27-48
  done: false
  episode_len_mean: 87.31304347826087
  episode_media: {}
  episode_reward_max: 10.670000000000016
  episode_reward_mean: 3.0933913043478327
  episode_reward_min: -1.6200000000000008
  episodes_this_iter: 115
  episodes_total: 23767
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3846114637505296
          entropy_coeff: 0.01
          kl: 0.014280239277918662
          policy_loss: -0.08118791078519809
          total_loss: 0.06543801742661585
          vf_explained_var: 0.9241780638694763
          vf_loss: 0.13793987111212352
    num_agent_steps_sampled: 2199120
    num_agent_steps_trained: 2199120
    num_steps_sampled: 2199120
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,220,33250.8,2199120,3.09339,10.67,-1.62,87.313


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2209116
  custom_metrics: {}
  date: 2021-11-07_23-30-10
  done: false
  episode_len_mean: 90.44954128440367
  episode_media: {}
  episode_reward_max: 12.310000000000016
  episode_reward_mean: 3.0260550458715665
  episode_reward_min: -1.5600000000000005
  episodes_this_iter: 109
  episodes_total: 23876
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.4041725077180782
          entropy_coeff: 0.01
          kl: 0.013387728774380415
          policy_loss: -0.08432606907927583
          total_loss: 0.046416815558177796
          vf_explained_var: 0.9218944311141968
          vf_loss: 0.1242856886731381
    num_agent_steps_sampled: 2209116
    num_agent_steps_trained: 2209116
    num_steps_sampled: 2209116
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,221,33393.1,2209116,3.02606,12.31,-1.56,90.4495




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2219112
  custom_metrics: {}
  date: 2021-11-07_23-32-46
  done: false
  episode_len_mean: 90.10714285714286
  episode_media: {}
  episode_reward_max: 10.390000000000017
  episode_reward_mean: 3.044196428571435
  episode_reward_min: -2.060000000000001
  episodes_this_iter: 112
  episodes_total: 23988
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.394341374054933
          entropy_coeff: 0.01
          kl: 0.012726435035454632
          policy_loss: -0.0853504970876707
          total_loss: 0.030381765313701244
          vf_explained_var: 0.9268773794174194
          vf_loss: 0.11068326431111647
    num_agent_steps_sampled: 2219112
    num_agent_steps_trained: 2219112
    num_steps_sampled: 2219112
    num_steps_trained: 2219

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,222,33548.8,2219112,3.0442,10.39,-2.06,90.1071




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2229108
  custom_metrics: {}
  date: 2021-11-07_23-35-19
  done: false
  episode_len_mean: 90.34234234234235
  episode_media: {}
  episode_reward_max: 12.810000000000016
  episode_reward_mean: 2.853333333333339
  episode_reward_min: -1.990000000000001
  episodes_this_iter: 111
  episodes_total: 24099
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.407589753265055
          entropy_coeff: 0.01
          kl: 0.013830805523344319
          policy_loss: -0.08537161463043756
          total_loss: 0.04543645109535537
          vf_explained_var: 0.9230453372001648
          vf_loss: 0.12337565811541983
    num_agent_steps_sampled: 2229108
    num_agent_steps_trained: 2229108
    num_steps_sampled: 2229108
    num_steps_trained: 2229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,223,33702.1,2229108,2.85333,12.81,-1.99,90.3423




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2239104
  custom_metrics: {}
  date: 2021-11-07_23-37-51
  done: false
  episode_len_mean: 90.63302752293578
  episode_media: {}
  episode_reward_max: 10.310000000000016
  episode_reward_mean: 2.45550458715597
  episode_reward_min: -1.7099999999999942
  episodes_this_iter: 109
  episodes_total: 24208
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.40388445263235
          entropy_coeff: 0.01
          kl: 0.013458997171704064
          policy_loss: -0.08324952856120137
          total_loss: 0.03628784605405397
          vf_explained_var: 0.9250745177268982
          vf_loss: 0.11291494032479504
    num_agent_steps_sampled: 2239104
    num_agent_steps_trained: 2239104
    num_steps_sampled: 2239104
    num_steps_trained: 22391

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,224,33853.8,2239104,2.4555,10.31,-1.71,90.633




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2249100
  custom_metrics: {}
  date: 2021-11-07_23-40-23
  done: false
  episode_len_mean: 89.45535714285714
  episode_media: {}
  episode_reward_max: 9.160000000000013
  episode_reward_mean: 3.201428571428579
  episode_reward_min: -1.960000000000001
  episodes_this_iter: 112
  episodes_total: 24320
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3820755532664113
          entropy_coeff: 0.01
          kl: 0.013474978417220604
          policy_loss: -0.08287762166916306
          total_loss: 0.04309471701072831
          vf_explained_var: 0.9399086236953735
          vf_loss: 0.11909540862354458
    num_agent_steps_sampled: 2249100
    num_agent_steps_trained: 2249100
    num_steps_sampled: 2249100
    num_steps_trained: 2249

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,225,34005.7,2249100,3.20143,9.16,-1.96,89.4554




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2259096
  custom_metrics: {}
  date: 2021-11-07_23-42-58
  done: false
  episode_len_mean: 89.14159292035399
  episode_media: {}
  episode_reward_max: 10.790000000000013
  episode_reward_mean: 2.9366371681416
  episode_reward_min: -1.640000000000001
  episodes_this_iter: 113
  episodes_total: 24433
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3876679583492444
          entropy_coeff: 0.01
          kl: 0.013503344914972834
          policy_loss: -0.08464182576435245
          total_loss: 0.040586585677268666
          vf_explained_var: 0.936714768409729
          vf_loss: 0.11834278514799781
    num_agent_steps_sampled: 2259096
    num_agent_steps_trained: 2259096
    num_steps_sampled: 2259096
    num_steps_trained: 22590

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,226,34160.5,2259096,2.93664,10.79,-1.64,89.1416




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2269092
  custom_metrics: {}
  date: 2021-11-07_23-46-06
  done: false
  episode_len_mean: 86.56896551724138
  episode_media: {}
  episode_reward_max: 11.070000000000007
  episode_reward_mean: 3.248620689655179
  episode_reward_min: -1.9700000000000009
  episodes_this_iter: 116
  episodes_total: 24549
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.377033751235049
          entropy_coeff: 0.01
          kl: 0.01370186157619514
          policy_loss: -0.08108604382245968
          total_loss: 0.06488989037580979
          vf_explained_var: 0.9226114153862
          vf_loss: 0.13853171796848376
    num_agent_steps_sampled: 2269092
    num_agent_steps_trained: 2269092
    num_steps_sampled: 2269092
    num_steps_trained: 2269092

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,227,34349.2,2269092,3.24862,11.07,-1.97,86.569




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2279088
  custom_metrics: {}
  date: 2021-11-07_23-48-44
  done: false
  episode_len_mean: 88.00884955752213
  episode_media: {}
  episode_reward_max: 10.650000000000016
  episode_reward_mean: 3.343274336283193
  episode_reward_min: -1.4700000000000006
  episodes_this_iter: 113
  episodes_total: 24662
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.384485812880035
          entropy_coeff: 0.01
          kl: 0.013746053110921173
          policy_loss: -0.0839869175854529
          total_loss: 0.04936821626610736
          vf_explained_var: 0.9346886277198792
          vf_loss: 0.1258847642594423
    num_agent_steps_sampled: 2279088
    num_agent_steps_trained: 2279088
    num_steps_sampled: 2279088
    num_steps_trained: 22790

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,228,34506.6,2279088,3.34327,10.65,-1.47,88.0088




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2289084
  custom_metrics: {}
  date: 2021-11-07_23-51-52
  done: false
  episode_len_mean: 88.40707964601769
  episode_media: {}
  episode_reward_max: 11.020000000000014
  episode_reward_mean: 3.6395575221239023
  episode_reward_min: -1.4200000000000004
  episodes_this_iter: 113
  episodes_total: 24775
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3576839330868844
          entropy_coeff: 0.01
          kl: 0.0136889501531214
          policy_loss: -0.08305489772882982
          total_loss: 0.048104871669386186
          vf_explained_var: 0.943844735622406
          vf_loss: 0.12355146749725199
    num_agent_steps_sampled: 2289084
    num_agent_steps_trained: 2289084
    num_steps_sampled: 2289084
    num_steps_trained: 228

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,229,34694.7,2289084,3.63956,11.02,-1.42,88.4071


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2299080
  custom_metrics: {}
  date: 2021-11-07_23-54-14
  done: false
  episode_len_mean: 89.50892857142857
  episode_media: {}
  episode_reward_max: 14.550000000000017
  episode_reward_mean: 3.15785714285715
  episode_reward_min: -1.9400000000000008
  episodes_this_iter: 112
  episodes_total: 24887
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3759879605382936
          entropy_coeff: 0.01
          kl: 0.013375503554459019
          policy_loss: -0.08223243140352843
          total_loss: 0.04723051572138937
          vf_explained_var: 0.934721827507019
          vf_loss: 0.12275175644228091
    num_agent_steps_sampled: 2299080
    num_agent_steps_trained: 2299080
    num_steps_sampled: 2299080
    num_steps_trained: 2299

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,230,34836.7,2299080,3.15786,14.55,-1.94,89.5089




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2309076
  custom_metrics: {}
  date: 2021-11-07_23-56-52
  done: false
  episode_len_mean: 87.58407079646018
  episode_media: {}
  episode_reward_max: 10.570000000000016
  episode_reward_mean: 2.8202654867256705
  episode_reward_min: -1.7500000000000009
  episodes_this_iter: 113
  episodes_total: 25000
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.381718896800636
          entropy_coeff: 0.01
          kl: 0.01319412683721822
          policy_loss: -0.0828968358783322
          total_loss: 0.032349580610728165
          vf_explained_var: 0.924372136592865
          vf_loss: 0.10900573260509051
    num_agent_steps_sampled: 2309076
    num_agent_steps_trained: 2309076
    num_steps_sampled: 2309076
    num_steps_trained: 2309

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,231,34994.9,2309076,2.82027,10.57,-1.75,87.5841




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2319072
  custom_metrics: {}
  date: 2021-11-07_23-59-43
  done: false
  episode_len_mean: 89.0625
  episode_media: {}
  episode_reward_max: 9.860000000000001
  episode_reward_mean: 2.983035714285721
  episode_reward_min: -1.7600000000000007
  episodes_this_iter: 112
  episodes_total: 25112
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.393980205568493
          entropy_coeff: 0.01
          kl: 0.013574260351931305
          policy_loss: -0.08300689376381218
          total_loss: 0.04918343280083858
          vf_explained_var: 0.9224579334259033
          vf_loss: 0.1252062648662135
    num_agent_steps_sampled: 2319072
    num_agent_steps_trained: 2319072
    num_steps_sampled: 2319072
    num_steps_trained: 2319072
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,232,35165.8,2319072,2.98304,9.86,-1.76,89.0625




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2329068
  custom_metrics: {}
  date: 2021-11-08_00-02-26
  done: false
  episode_len_mean: 88.93805309734513
  episode_media: {}
  episode_reward_max: 10.700000000000014
  episode_reward_mean: 3.009911504424786
  episode_reward_min: -1.6100000000000008
  episodes_this_iter: 113
  episodes_total: 25225
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3923714242429814
          entropy_coeff: 0.01
          kl: 0.014613870444713599
          policy_loss: -0.07793599477547229
          total_loss: 0.05117720916994616
          vf_explained_var: 0.9286001324653625
          vf_loss: 0.11974469303416135
    num_agent_steps_sampled: 2329068
    num_agent_steps_trained: 2329068
    num_steps_sampled: 2329068
    num_steps_trained: 23

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,233,35328.6,2329068,3.00991,10.7,-1.61,88.9381




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2339064
  custom_metrics: {}
  date: 2021-11-08_00-05-19
  done: false
  episode_len_mean: 91.26363636363637
  episode_media: {}
  episode_reward_max: 9.190000000000012
  episode_reward_mean: 2.7405454545454613
  episode_reward_min: -1.7500000000000009
  episodes_this_iter: 110
  episodes_total: 25335
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3998292503193914
          entropy_coeff: 0.01
          kl: 0.013362683813476281
          policy_loss: -0.08489182373811292
          total_loss: 0.032122795240810284
          vf_explained_var: 0.9247320890426636
          vf_loss: 0.11057104621257664
    num_agent_steps_sampled: 2339064
    num_agent_steps_trained: 2339064
    num_steps_sampled: 2339064
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,234,35502.1,2339064,2.74055,9.19,-1.75,91.2636




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2349060
  custom_metrics: {}
  date: 2021-11-08_00-07-56
  done: false
  episode_len_mean: 90.38532110091744
  episode_media: {}
  episode_reward_max: 9.430000000000009
  episode_reward_mean: 3.0370642201834923
  episode_reward_min: -1.3200000000000003
  episodes_this_iter: 109
  episodes_total: 25444
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3913690444750664
          entropy_coeff: 0.01
          kl: 0.013217811902140393
          policy_loss: -0.083203156470743
          total_loss: 0.04064760210040288
          vf_explained_var: 0.9220372438430786
          vf_loss: 0.11765261951069801
    num_agent_steps_sampled: 2349060
    num_agent_steps_trained: 2349060
    num_steps_sampled: 2349060
    num_steps_trained: 2349

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,235,35658.4,2349060,3.03706,9.43,-1.32,90.3853




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2359056
  custom_metrics: {}
  date: 2021-11-08_00-10-34
  done: false
  episode_len_mean: 90.16814159292035
  episode_media: {}
  episode_reward_max: 10.710000000000013
  episode_reward_mean: 3.0458407079646093
  episode_reward_min: -2.0699999999999994
  episodes_this_iter: 113
  episodes_total: 25557
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.382973528927208
          entropy_coeff: 0.01
          kl: 0.013054754264458762
          policy_loss: -0.08221659087695372
          total_loss: 0.04272654817487376
          vf_explained_var: 0.9286047220230103
          vf_loss: 0.1190325113108907
    num_agent_steps_sampled: 2359056
    num_agent_steps_trained: 2359056
    num_steps_sampled: 2359056
    num_steps_trained: 235

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,236,35816.9,2359056,3.04584,10.71,-2.07,90.1681




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2369052
  custom_metrics: {}
  date: 2021-11-08_00-13-20
  done: false
  episode_len_mean: 89.9
  episode_media: {}
  episode_reward_max: 12.640000000000013
  episode_reward_mean: 3.095181818181825
  episode_reward_min: -2.5300000000000007
  episodes_this_iter: 110
  episodes_total: 25667
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.389985664278014
          entropy_coeff: 0.01
          kl: 0.013867084654004306
          policy_loss: -0.08293770703717938
          total_loss: 0.051845049852521245
          vf_explained_var: 0.9222933650016785
          vf_loss: 0.12709165998997216
    num_agent_steps_sampled: 2369052
    num_agent_steps_trained: 2369052
    num_steps_sampled: 2369052
    num_steps_trained: 2369052
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,237,35982.9,2369052,3.09518,12.64,-2.53,89.9




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2379048
  custom_metrics: {}
  date: 2021-11-08_00-16-05
  done: false
  episode_len_mean: 91.05454545454545
  episode_media: {}
  episode_reward_max: 12.190000000000014
  episode_reward_mean: 3.2118181818181903
  episode_reward_min: -1.8000000000000007
  episodes_this_iter: 110
  episodes_total: 25777
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3533908118549576
          entropy_coeff: 0.01
          kl: 0.013074791845927271
          policy_loss: -0.08161853054602049
          total_loss: 0.034925814548459576
          vf_explained_var: 0.9384793639183044
          vf_loss: 0.1102922436216066
    num_agent_steps_sampled: 2379048
    num_agent_steps_trained: 2379048
    num_steps_sampled: 2379048
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,238,36147.3,2379048,3.21182,12.19,-1.8,91.0545




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2389044
  custom_metrics: {}
  date: 2021-11-08_00-18-40
  done: false
  episode_len_mean: 89.17857142857143
  episode_media: {}
  episode_reward_max: 11.210000000000013
  episode_reward_mean: 3.3482142857142927
  episode_reward_min: -1.4600000000000006
  episodes_this_iter: 112
  episodes_total: 25889
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3583160681602284
          entropy_coeff: 0.01
          kl: 0.012927454168654267
          policy_loss: -0.08027090525302367
          total_loss: 0.033822774115758826
          vf_explained_var: 0.9432406425476074
          vf_loss: 0.10822648257415136
    num_agent_steps_sampled: 2389044
    num_agent_steps_trained: 2389044
    num_steps_sampled: 2389044
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,239,36302,2389044,3.34821,11.21,-1.46,89.1786




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2399040
  custom_metrics: {}
  date: 2021-11-08_00-21-26
  done: false
  episode_len_mean: 89.05357142857143
  episode_media: {}
  episode_reward_max: 11.060000000000013
  episode_reward_mean: 3.2462500000000074
  episode_reward_min: -1.690000000000001
  episodes_this_iter: 112
  episodes_total: 26001
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.367945357876965
          entropy_coeff: 0.01
          kl: 0.013828114818054993
          policy_loss: -0.0822649980075339
          total_loss: 0.04144268870098978
          vf_explained_var: 0.9394233822822571
          vf_loss: 0.11588496625598552
    num_agent_steps_sampled: 2399040
    num_agent_steps_trained: 2399040
    num_steps_sampled: 2399040
    num_steps_trained: 2399

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,240,36468.8,2399040,3.24625,11.06,-1.69,89.0536




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2409036
  custom_metrics: {}
  date: 2021-11-08_00-24-14
  done: false
  episode_len_mean: 89.61607142857143
  episode_media: {}
  episode_reward_max: 13.120000000000012
  episode_reward_mean: 3.4716964285714362
  episode_reward_min: -1.3600000000000005
  episodes_this_iter: 112
  episodes_total: 26113
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3684270090527004
          entropy_coeff: 0.01
          kl: 0.014107696066816484
          policy_loss: -0.07750006488436817
          total_loss: 0.062364077193933165
          vf_explained_var: 0.9225014448165894
          vf_loss: 0.1314093169923394
    num_agent_steps_sampled: 2409036
    num_agent_steps_trained: 2409036
    num_steps_sampled: 2409036
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,241,36636.6,2409036,3.4717,13.12,-1.36,89.6161


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2419032
  custom_metrics: {}
  date: 2021-11-08_00-26-31
  done: false
  episode_len_mean: 92.32407407407408
  episode_media: {}
  episode_reward_max: 12.150000000000016
  episode_reward_mean: 2.815462962962971
  episode_reward_min: -2.000000000000001
  episodes_this_iter: 108
  episodes_total: 26221
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3851566361565877
          entropy_coeff: 0.01
          kl: 0.012720692789492938
          policy_loss: -0.08382953648320121
          total_loss: 0.036500998248911315
          vf_explained_var: 0.9358655214309692
          vf_loss: 0.11520277216600684
    num_agent_steps_sampled: 2419032
    num_agent_steps_trained: 2419032
    num_steps_sampled: 2419032
    num_steps_trained: 24

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,242,36773.5,2419032,2.81546,12.15,-2,92.3241




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2429028
  custom_metrics: {}
  date: 2021-11-08_00-29-16
  done: false
  episode_len_mean: 89.125
  episode_media: {}
  episode_reward_max: 9.710000000000003
  episode_reward_mean: 2.9535714285714363
  episode_reward_min: -2.289999999999998
  episodes_this_iter: 112
  episodes_total: 26333
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3737765418158636
          entropy_coeff: 0.01
          kl: 0.01298311577786199
          policy_loss: -0.08139312729303144
          total_loss: 0.04335817000072481
          vf_explained_var: 0.9199544191360474
          vf_loss: 0.11891190130183966
    num_agent_steps_sampled: 2429028
    num_agent_steps_trained: 2429028
    num_steps_sampled: 2429028
    num_steps_trained: 2429028
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,243,36938.3,2429028,2.95357,9.71,-2.29,89.125




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2439024
  custom_metrics: {}
  date: 2021-11-08_00-31-51
  done: false
  episode_len_mean: 89.36607142857143
  episode_media: {}
  episode_reward_max: 10.920000000000012
  episode_reward_mean: 3.275625000000008
  episode_reward_min: -1.830000000000001
  episodes_this_iter: 112
  episodes_total: 26445
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.348641552273025
          entropy_coeff: 0.01
          kl: 0.01424710801949066
          policy_loss: -0.07917197962474619
          total_loss: 0.0608507546588269
          vf_explained_var: 0.9259290099143982
          vf_loss: 0.13105245629070789
    num_agent_steps_sampled: 2439024
    num_agent_steps_trained: 2439024
    num_steps_sampled: 2439024
    num_steps_trained: 243902

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,244,37093.3,2439024,3.27563,10.92,-1.83,89.3661




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2449020
  custom_metrics: {}
  date: 2021-11-08_00-34-31
  done: false
  episode_len_mean: 89.54464285714286
  episode_media: {}
  episode_reward_max: 12.570000000000016
  episode_reward_mean: 3.546875000000008
  episode_reward_min: -1.589999999999997
  episodes_this_iter: 112
  episodes_total: 26557
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3575934966405234
          entropy_coeff: 0.01
          kl: 0.01408624194781229
          policy_loss: -0.07879312064530503
          total_loss: 0.06505100436062894
          vf_explained_var: 0.9357138276100159
          vf_loss: 0.13532983924970668
    num_agent_steps_sampled: 2449020
    num_agent_steps_trained: 2449020
    num_steps_sampled: 2449020
    num_steps_trained: 2449

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,245,37253.1,2449020,3.54688,12.57,-1.59,89.5446




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2459016
  custom_metrics: {}
  date: 2021-11-08_00-37-35
  done: false
  episode_len_mean: 89.33928571428571
  episode_media: {}
  episode_reward_max: 9.040000000000012
  episode_reward_mean: 3.25910714285715
  episode_reward_min: -1.7300000000000006
  episodes_this_iter: 112
  episodes_total: 26669
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.362684541889745
          entropy_coeff: 0.01
          kl: 0.013514046320907087
          policy_loss: -0.07995144757322777
          total_loss: 0.05054726894053384
          vf_explained_var: 0.932756781578064
          vf_loss: 0.12333887472716916
    num_agent_steps_sampled: 2459016
    num_agent_steps_trained: 2459016
    num_steps_sampled: 2459016
    num_steps_trained: 245901

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,246,37437,2459016,3.25911,9.04,-1.73,89.3393




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2469012
  custom_metrics: {}
  date: 2021-11-08_00-40-18
  done: false
  episode_len_mean: 89.16964285714286
  episode_media: {}
  episode_reward_max: 10.820000000000013
  episode_reward_mean: 3.2065178571428636
  episode_reward_min: -1.9200000000000008
  episodes_this_iter: 112
  episodes_total: 26781
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3528144559289657
          entropy_coeff: 0.01
          kl: 0.013422110053437266
          policy_loss: -0.07871367628288128
          total_loss: 0.06638657708899078
          vf_explained_var: 0.9108462333679199
          vf_loss: 0.13805115392160977
    num_agent_steps_sampled: 2469012
    num_agent_steps_trained: 2469012
    num_steps_sampled: 2469012
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,247,37599.9,2469012,3.20652,10.82,-1.92,89.1696


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2479008
  custom_metrics: {}
  date: 2021-11-08_00-42-39
  done: false
  episode_len_mean: 90.09909909909909
  episode_media: {}
  episode_reward_max: 10.460000000000017
  episode_reward_mean: 3.5922522522522606
  episode_reward_min: -1.3600000000000008
  episodes_this_iter: 111
  episodes_total: 26892
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3450595969827766
          entropy_coeff: 0.01
          kl: 0.013501083833265176
          policy_loss: -0.07931246595600476
          total_loss: 0.06485062800905006
          vf_explained_var: 0.9258711934089661
          vf_loss: 0.13685653187637017
    num_agent_steps_sampled: 2479008
    num_agent_steps_trained: 2479008
    num_steps_sampled: 2479008
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,248,37741.4,2479008,3.59225,10.46,-1.36,90.0991




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2489004
  custom_metrics: {}
  date: 2021-11-08_00-45-15
  done: false
  episode_len_mean: 88.4424778761062
  episode_media: {}
  episode_reward_max: 10.610000000000017
  episode_reward_mean: 3.2928318584070873
  episode_reward_min: -1.5300000000000005
  episodes_this_iter: 113
  episodes_total: 27005
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3393336340912385
          entropy_coeff: 0.01
          kl: 0.01295707716910926
          policy_loss: -0.08188570034491201
          total_loss: 0.03556880132159871
          vf_explained_var: 0.9380015134811401
          vf_loss: 0.11132999621172492
    num_agent_steps_sampled: 2489004
    num_agent_steps_trained: 2489004
    num_steps_sampled: 2489004
    num_steps_trained: 248

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,249,37897,2489004,3.29283,10.61,-1.53,88.4425




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2499000
  custom_metrics: {}
  date: 2021-11-08_00-48-07
  done: false
  episode_len_mean: 88.41071428571429
  episode_media: {}
  episode_reward_max: 10.370000000000017
  episode_reward_mean: 2.9547321428571505
  episode_reward_min: -1.6800000000000006
  episodes_this_iter: 112
  episodes_total: 27117
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.353748286483634
          entropy_coeff: 0.01
          kl: 0.013120730145106197
          policy_loss: -0.07741196746818531
          total_loss: 0.060774256072492684
          vf_explained_var: 0.928286612033844
          vf_loss: 0.13183304283162978
    num_agent_steps_sampled: 2499000
    num_agent_steps_trained: 2499000
    num_steps_sampled: 2499000
    num_steps_trained: 24

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,250,38068.8,2499000,2.95473,10.37,-1.68,88.4107


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2508996
  custom_metrics: {}
  date: 2021-11-08_00-50-29
  done: false
  episode_len_mean: 90.29464285714286
  episode_media: {}
  episode_reward_max: 12.890000000000015
  episode_reward_mean: 3.3594642857142945
  episode_reward_min: -1.6500000000000008
  episodes_this_iter: 112
  episodes_total: 27229
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3500593311766274
          entropy_coeff: 0.01
          kl: 0.013375323385929994
          policy_loss: -0.07903031372489074
          total_loss: 0.047069967907463386
          vf_explained_var: 0.936002790927887
          vf_loss: 0.11913021498192579
    num_agent_steps_sampled: 2508996
    num_agent_steps_trained: 2508996
    num_steps_sampled: 2508996
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,251,38210.7,2508996,3.35946,12.89,-1.65,90.2946




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2518992
  custom_metrics: {}
  date: 2021-11-08_00-53-07
  done: false
  episode_len_mean: 88.57142857142857
  episode_media: {}
  episode_reward_max: 12.780000000000015
  episode_reward_mean: 2.970000000000007
  episode_reward_min: -2.0600000000000005
  episodes_this_iter: 112
  episodes_total: 27341
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3383243701396843
          entropy_coeff: 0.01
          kl: 0.013404003164280821
          policy_loss: -0.07970200954403123
          total_loss: 0.0563889261526175
          vf_explained_var: 0.9154801964759827
          vf_loss: 0.12893818378066405
    num_agent_steps_sampled: 2518992
    num_agent_steps_trained: 2518992
    num_steps_sampled: 2518992
    num_steps_trained: 251

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,252,38369.2,2518992,2.97,12.78,-2.06,88.5714




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2528988
  custom_metrics: {}
  date: 2021-11-08_00-55-44
  done: false
  episode_len_mean: 88.19469026548673
  episode_media: {}
  episode_reward_max: 12.110000000000017
  episode_reward_mean: 3.3274336283185915
  episode_reward_min: -1.7100000000000006
  episodes_this_iter: 113
  episodes_total: 27454
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.348838087839958
          entropy_coeff: 0.01
          kl: 0.013577138966144417
          policy_loss: -0.08062504519286573
          total_loss: 0.06345655775159342
          vf_explained_var: 0.9250807762145996
          vf_loss: 0.13663956318369025
    num_agent_steps_sampled: 2528988
    num_agent_steps_trained: 2528988
    num_steps_sampled: 2528988
    num_steps_trained: 25

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,253,38525.6,2528988,3.32743,12.11,-1.71,88.1947




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2538984
  custom_metrics: {}
  date: 2021-11-08_00-58-17
  done: false
  episode_len_mean: 89.54867256637168
  episode_media: {}
  episode_reward_max: 12.820000000000014
  episode_reward_mean: 3.438407079646025
  episode_reward_min: -1.680000000000001
  episodes_this_iter: 113
  episodes_total: 27567
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3399653864721968
          entropy_coeff: 0.01
          kl: 0.014361058626608646
          policy_loss: -0.08000292105711679
          total_loss: 0.05655276878681193
          vf_explained_var: 0.931330144405365
          vf_loss: 0.12723905466910865
    num_agent_steps_sampled: 2538984
    num_agent_steps_trained: 2538984
    num_steps_sampled: 2538984
    num_steps_trained: 2538

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,254,38679.2,2538984,3.43841,12.82,-1.68,89.5487




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2548980
  custom_metrics: {}
  date: 2021-11-08_01-00-48
  done: false
  episode_len_mean: 90.91743119266054
  episode_media: {}
  episode_reward_max: 12.77000000000001
  episode_reward_mean: 3.39366972477065
  episode_reward_min: -1.5800000000000007
  episodes_this_iter: 109
  episodes_total: 27676
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.344546906560914
          entropy_coeff: 0.01
          kl: 0.01332099729446033
          policy_loss: -0.07787542131164263
          total_loss: 0.04258472363217774
          vf_explained_var: 0.9422920346260071
          vf_loss: 0.11355871740712696
    num_agent_steps_sampled: 2548980
    num_agent_steps_trained: 2548980
    num_steps_sampled: 2548980
    num_steps_trained: 254898

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,255,38830.2,2548980,3.39367,12.77,-1.58,90.9174




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2558976
  custom_metrics: {}
  date: 2021-11-08_01-03-38
  done: false
  episode_len_mean: 87.7304347826087
  episode_media: {}
  episode_reward_max: 10.640000000000013
  episode_reward_mean: 3.3660869565217473
  episode_reward_min: -1.6899999999999995
  episodes_this_iter: 115
  episodes_total: 27791
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3306805573976956
          entropy_coeff: 0.01
          kl: 0.013272419148033314
          policy_loss: -0.07972523186785671
          total_loss: 0.06013835148854006
          vf_explained_var: 0.9273483157157898
          vf_loss: 0.13293415853817367
    num_agent_steps_sampled: 2558976
    num_agent_steps_trained: 2558976
    num_steps_sampled: 2558976
    num_steps_trained: 25

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,256,38999.9,2558976,3.36609,10.64,-1.69,87.7304


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2568972
  custom_metrics: {}
  date: 2021-11-08_01-05-58
  done: false
  episode_len_mean: 90.32727272727273
  episode_media: {}
  episode_reward_max: 17.82999999999999
  episode_reward_mean: 3.816909090909099
  episode_reward_min: -1.890000000000001
  episodes_this_iter: 110
  episodes_total: 27901
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.35510842942784
          entropy_coeff: 0.01
          kl: 0.013143986099123296
          policy_loss: -0.07819413240266661
          total_loss: 0.05457863764534903
          vf_explained_var: 0.9404085874557495
          vf_loss: 0.12638020970038752
    num_agent_steps_sampled: 2568972
    num_agent_steps_trained: 2568972
    num_steps_sampled: 2568972
    num_steps_trained: 256897

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,257,39139.4,2568972,3.81691,17.83,-1.89,90.3273




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2578968
  custom_metrics: {}
  date: 2021-11-08_01-08-30
  done: false
  episode_len_mean: 88.70796460176992
  episode_media: {}
  episode_reward_max: 12.610000000000015
  episode_reward_mean: 3.1307964601769975
  episode_reward_min: -1.770000000000001
  episodes_this_iter: 113
  episodes_total: 28014
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3300176290365364
          entropy_coeff: 0.01
          kl: 0.012979053469094066
          policy_loss: -0.07869649688657532
          total_loss: 0.044675831229258806
          vf_explained_var: 0.9439103603363037
          vf_loss: 0.11710459750432234
    num_agent_steps_sampled: 2578968
    num_agent_steps_trained: 2578968
    num_steps_sampled: 2578968
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,258,39292.2,2578968,3.1308,12.61,-1.77,88.708




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2588964
  custom_metrics: {}
  date: 2021-11-08_01-11-22
  done: false
  episode_len_mean: 86.65217391304348
  episode_media: {}
  episode_reward_max: 12.670000000000018
  episode_reward_mean: 3.7263478260869647
  episode_reward_min: -1.830000000000001
  episodes_this_iter: 115
  episodes_total: 28129
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3342390223446055
          entropy_coeff: 0.01
          kl: 0.014677873852633024
          policy_loss: -0.07775660125681987
          total_loss: 0.08915218182672292
          vf_explained_var: 0.9308323264122009
          vf_loss: 0.1568131406019386
    num_agent_steps_sampled: 2588964
    num_agent_steps_trained: 2588964
    num_steps_sampled: 2588964
    num_steps_trained: 258

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,259,39463.2,2588964,3.72635,12.67,-1.83,86.6522


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2598960
  custom_metrics: {}
  date: 2021-11-08_01-13-46
  done: false
  episode_len_mean: 88.91071428571429
  episode_media: {}
  episode_reward_max: 12.320000000000013
  episode_reward_mean: 3.4479464285714365
  episode_reward_min: -1.6600000000000008
  episodes_this_iter: 112
  episodes_total: 28241
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.334736793061607
          entropy_coeff: 0.01
          kl: 0.013348438191310414
          policy_loss: -0.07509671387971084
          total_loss: 0.08247017533335294
          vf_explained_var: 0.9294275045394897
          vf_loss: 0.15050484566097586
    num_agent_steps_sampled: 2598960
    num_agent_steps_trained: 2598960
    num_steps_sampled: 2598960
    num_steps_trained: 25

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,260,39608,2598960,3.44795,12.32,-1.66,88.9107




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2608956
  custom_metrics: {}
  date: 2021-11-08_01-16-31
  done: false
  episode_len_mean: 87.66956521739131
  episode_media: {}
  episode_reward_max: 11.200000000000012
  episode_reward_mean: 3.2908695652174
  episode_reward_min: -2.18
  episodes_this_iter: 115
  episodes_total: 28356
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3355947649377025
          entropy_coeff: 0.01
          kl: 0.012793875829869277
          policy_loss: -0.07626374440633843
          total_loss: 0.06174295794011818
          vf_explained_var: 0.9361783266067505
          vf_loss: 0.1322166012568224
    num_agent_steps_sampled: 2608956
    num_agent_steps_trained: 2608956
    num_steps_sampled: 2608956
    num_steps_trained: 2608956
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,261,39772.8,2608956,3.29087,11.2,-2.18,87.6696




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2618952
  custom_metrics: {}
  date: 2021-11-08_01-19-13
  done: false
  episode_len_mean: 89.88288288288288
  episode_media: {}
  episode_reward_max: 13.010000000000014
  episode_reward_mean: 3.5694594594594675
  episode_reward_min: -1.4800000000000006
  episodes_this_iter: 111
  episodes_total: 28467
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3434153395840247
          entropy_coeff: 0.01
          kl: 0.014035957644635203
          policy_loss: -0.07705185031279539
          total_loss: 0.0730914755951231
          vf_explained_var: 0.9307616949081421
          vf_loss: 0.1416018136912304
    num_agent_steps_sampled: 2618952
    num_agent_steps_trained: 2618952
    num_steps_sampled: 2618952
    num_steps_trained: 261

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,262,39934.8,2618952,3.56946,13.01,-1.48,89.8829




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2628948
  custom_metrics: {}
  date: 2021-11-08_01-21-47
  done: false
  episode_len_mean: 88.67857142857143
  episode_media: {}
  episode_reward_max: 14.980000000000015
  episode_reward_mean: 3.7261607142857227
  episode_reward_min: -1.7400000000000009
  episodes_this_iter: 112
  episodes_total: 28579
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3369773561119014
          entropy_coeff: 0.01
          kl: 0.013376007629009059
          policy_loss: -0.0762607682074428
          total_loss: 0.05738064178830793
          vf_explained_var: 0.9419529438018799
          vf_loss: 0.12653896452652083
    num_agent_steps_sampled: 2628948
    num_agent_steps_trained: 2628948
    num_steps_sampled: 2628948
    num_steps_trained: 26

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,263,40088.4,2628948,3.72616,14.98,-1.74,88.6786




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2638944
  custom_metrics: {}
  date: 2021-11-08_01-24-43
  done: false
  episode_len_mean: 89.32743362831859
  episode_media: {}
  episode_reward_max: 10.920000000000014
  episode_reward_mean: 3.456371681415937
  episode_reward_min: -2.0999999999999988
  episodes_this_iter: 113
  episodes_total: 28692
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.34367320130014
          entropy_coeff: 0.01
          kl: 0.013513748302436613
          policy_loss: -0.07806098547994963
          total_loss: 0.07385659182730775
          vf_explained_var: 0.9373729228973389
          vf_loss: 0.14456830100817047
    num_agent_steps_sampled: 2638944
    num_agent_steps_trained: 2638944
    num_steps_sampled: 2638944
    num_steps_trained: 2638

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,264,40264.2,2638944,3.45637,10.92,-2.1,89.3274




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2648940
  custom_metrics: {}
  date: 2021-11-08_01-27-32
  done: false
  episode_len_mean: 88.39285714285714
  episode_media: {}
  episode_reward_max: 10.920000000000012
  episode_reward_mean: 3.604642857142865
  episode_reward_min: -1.7100000000000009
  episodes_this_iter: 112
  episodes_total: 28804
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3236286020686485
          entropy_coeff: 0.01
          kl: 0.01295968227417844
          policy_loss: -0.07880269026335998
          total_loss: 0.04831588962553149
          vf_explained_var: 0.9447145462036133
          vf_loss: 0.1208310880817664
    num_agent_steps_sampled: 2648940
    num_agent_steps_trained: 2648940
    num_steps_sampled: 2648940
    num_steps_trained: 2648

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,265,40433.5,2648940,3.60464,10.92,-1.71,88.3929


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2658936
  custom_metrics: {}
  date: 2021-11-08_01-30-00
  done: false
  episode_len_mean: 90.21621621621621
  episode_media: {}
  episode_reward_max: 12.930000000000014
  episode_reward_mean: 3.3099099099099174
  episode_reward_min: -1.3800000000000003
  episodes_this_iter: 111
  episodes_total: 28915
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3646230025169177
          entropy_coeff: 0.01
          kl: 0.012928427927730121
          policy_loss: -0.07980219410875669
          total_loss: 0.04974684279778192
          vf_explained_var: 0.9299294352531433
          vf_loss: 0.12374269170447802
    num_agent_steps_sampled: 2658936
    num_agent_steps_trained: 2658936
    num_steps_sampled: 2658936
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,266,40581.5,2658936,3.30991,12.93,-1.38,90.2162




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2668932
  custom_metrics: {}
  date: 2021-11-08_01-32-43
  done: false
  episode_len_mean: 89.61261261261261
  episode_media: {}
  episode_reward_max: 18.689999999999973
  episode_reward_mean: 3.3919819819819894
  episode_reward_min: -1.8600000000000008
  episodes_this_iter: 111
  episodes_total: 29026
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.35138480459523
          entropy_coeff: 0.01
          kl: 0.014261683336800206
          policy_loss: -0.07360491472590938
          total_loss: 0.07496083345200516
          vf_explained_var: 0.9378887414932251
          vf_loss: 0.13958969846861358
    num_agent_steps_sampled: 2668932
    num_agent_steps_trained: 2668932
    num_steps_sampled: 2668932
    num_steps_trained: 266

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,267,40744,2668932,3.39198,18.69,-1.86,89.6126




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2678928
  custom_metrics: {}
  date: 2021-11-08_01-35-22
  done: false
  episode_len_mean: 87.19827586206897
  episode_media: {}
  episode_reward_max: 10.660000000000014
  episode_reward_mean: 3.0040517241379376
  episode_reward_min: -1.9000000000000008
  episodes_this_iter: 116
  episodes_total: 29142
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3430225237821922
          entropy_coeff: 0.01
          kl: 0.013015928661369814
          policy_loss: -0.07670604099845911
          total_loss: 0.05982554777183084
          vf_explained_var: 0.9280214905738831
          vf_loss: 0.13030990052840905
    num_agent_steps_sampled: 2678928
    num_agent_steps_trained: 2678928
    num_steps_sampled: 2678928
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,268,40903.2,2678928,3.00405,10.66,-1.9,87.1983


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2688924
  custom_metrics: {}
  date: 2021-11-08_01-37-46
  done: false
  episode_len_mean: 88.72321428571429
  episode_media: {}
  episode_reward_max: 10.980000000000011
  episode_reward_mean: 3.4491071428571507
  episode_reward_min: -1.1800000000000004
  episodes_this_iter: 112
  episodes_total: 29254
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3394033409591413
          entropy_coeff: 0.01
          kl: 0.013551301082980752
          policy_loss: -0.0814849111259493
          total_loss: 0.0622666869376205
          vf_explained_var: 0.9208278059959412
          vf_loss: 0.1362740721497844
    num_agent_steps_sampled: 2688924
    num_agent_steps_trained: 2688924
    num_steps_sampled: 2688924
    num_steps_trained: 2688

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,269,41047.4,2688924,3.44911,10.98,-1.18,88.7232




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2698920
  custom_metrics: {}
  date: 2021-11-08_01-40-28
  done: false
  episode_len_mean: 88.6875
  episode_media: {}
  episode_reward_max: 14.55000000000001
  episode_reward_mean: 3.6522321428571516
  episode_reward_min: -1.4400000000000006
  episodes_this_iter: 112
  episodes_total: 29366
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3271913061794054
          entropy_coeff: 0.01
          kl: 0.013284144643602964
          policy_loss: -0.07972003694177948
          total_loss: 0.052993119036794725
          vf_explained_var: 0.9310900568962097
          vf_loss: 0.12572212645975062
    num_agent_steps_sampled: 2698920
    num_agent_steps_trained: 2698920
    num_steps_sampled: 2698920
    num_steps_trained: 2698920
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,270,41209,2698920,3.65223,14.55,-1.44,88.6875




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2708916
  custom_metrics: {}
  date: 2021-11-08_01-43-04
  done: false
  episode_len_mean: 88.30434782608695
  episode_media: {}
  episode_reward_max: 13.050000000000013
  episode_reward_mean: 3.491304347826095
  episode_reward_min: -2.0699999999999994
  episodes_this_iter: 115
  episodes_total: 29481
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.332050629151173
          entropy_coeff: 0.01
          kl: 0.014084952367522673
          policy_loss: -0.0777442912833813
          total_loss: 0.06753177712074457
          vf_explained_var: 0.910218358039856
          vf_loss: 0.13650929138112144
    num_agent_steps_sampled: 2708916
    num_agent_steps_trained: 2708916
    num_steps_sampled: 2708916
    num_steps_trained: 27089

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,271,41365.3,2708916,3.4913,13.05,-2.07,88.3043




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2718912
  custom_metrics: {}
  date: 2021-11-08_01-45-38
  done: false
  episode_len_mean: 89.16071428571429
  episode_media: {}
  episode_reward_max: 9.28000000000001
  episode_reward_mean: 3.1620535714285785
  episode_reward_min: -1.8500000000000008
  episodes_this_iter: 112
  episodes_total: 29593
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3438013086971057
          entropy_coeff: 0.01
          kl: 0.01347845577969753
          policy_loss: -0.07913110814988614
          total_loss: 0.06234211160076989
          vf_explained_var: 0.9254655241966248
          vf_loss: 0.13420562431948563
    num_agent_steps_sampled: 2718912
    num_agent_steps_trained: 2718912
    num_steps_sampled: 2718912
    num_steps_trained: 2718

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,272,41519.2,2718912,3.16205,9.28,-1.85,89.1607




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2728908
  custom_metrics: {}
  date: 2021-11-08_01-48-13
  done: false
  episode_len_mean: 89.72072072072072
  episode_media: {}
  episode_reward_max: 12.560000000000013
  episode_reward_mean: 3.909369369369378
  episode_reward_min: -1.790000000000001
  episodes_this_iter: 111
  episodes_total: 29704
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3189012011911116
          entropy_coeff: 0.01
          kl: 0.013237689126356589
          policy_loss: -0.0742702613465297
          total_loss: 0.0713982396846653
          vf_explained_var: 0.9430211186408997
          vf_loss: 0.13870040316325732
    num_agent_steps_sampled: 2728908
    num_agent_steps_trained: 2728908
    num_steps_sampled: 2728908
    num_steps_trained: 27289

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,273,41674.4,2728908,3.90937,12.56,-1.79,89.7207


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2738904
  custom_metrics: {}
  date: 2021-11-08_01-50-39
  done: false
  episode_len_mean: 90.13636363636364
  episode_media: {}
  episode_reward_max: 14.810000000000013
  episode_reward_mean: 3.8169090909090992
  episode_reward_min: -1.6000000000000005
  episodes_this_iter: 110
  episodes_total: 29814
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.320848163376507
          entropy_coeff: 0.01
          kl: 0.013410258156379931
          policy_loss: -0.07993370618384618
          total_loss: 0.05444901380369551
          vf_explained_var: 0.9301467537879944
          vf_loss: 0.12704095710387342
    num_agent_steps_sampled: 2738904
    num_agent_steps_trained: 2738904
    num_steps_sampled: 2738904
    num_steps_trained: 27

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,274,41820.5,2738904,3.81691,14.81,-1.6,90.1364




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2748900
  custom_metrics: {}
  date: 2021-11-08_01-53-17
  done: false
  episode_len_mean: 90.71818181818182
  episode_media: {}
  episode_reward_max: 14.760000000000016
  episode_reward_mean: 3.3574545454545537
  episode_reward_min: -1.850000000000001
  episodes_this_iter: 110
  episodes_total: 29924
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3349648917842116
          entropy_coeff: 0.01
          kl: 0.013527185042516595
          policy_loss: -0.077975686518555
          total_loss: 0.05163266173420617
          vf_explained_var: 0.9400271773338318
          vf_loss: 0.12214137855996815
    num_agent_steps_sampled: 2748900
    num_agent_steps_trained: 2748900
    num_steps_sampled: 2748900
    num_steps_trained: 2748

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,275,41977.9,2748900,3.35745,14.76,-1.85,90.7182




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2758896
  custom_metrics: {}
  date: 2021-11-08_01-56-16
  done: false
  episode_len_mean: 87.84210526315789
  episode_media: {}
  episode_reward_max: 13.040000000000013
  episode_reward_mean: 3.5164035087719374
  episode_reward_min: -1.5300000000000005
  episodes_this_iter: 114
  episodes_total: 30038
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3224769284582547
          entropy_coeff: 0.01
          kl: 0.014328083114676371
          policy_loss: -0.07114215790429432
          total_loss: 0.08568321576931029
          vf_explained_var: 0.9231038689613342
          vf_loss: 0.14740897753697813
    num_agent_steps_sampled: 2758896
    num_agent_steps_trained: 2758896
    num_steps_sampled: 2758896
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,276,42157.4,2758896,3.5164,13.04,-1.53,87.8421




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2768892
  custom_metrics: {}
  date: 2021-11-08_01-58-52
  done: false
  episode_len_mean: 89.65178571428571
  episode_media: {}
  episode_reward_max: 10.99000000000001
  episode_reward_mean: 3.1990178571428642
  episode_reward_min: -1.4500000000000004
  episodes_this_iter: 112
  episodes_total: 30150
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.33144281501444
          entropy_coeff: 0.01
          kl: 0.013174911522529746
          policy_loss: -0.07667715726181483
          total_loss: 0.06899293252290824
          vf_explained_var: 0.9229164719581604
          vf_loss: 0.13897042182616443
    num_agent_steps_sampled: 2768892
    num_agent_steps_trained: 2768892
    num_steps_sampled: 2768892
    num_steps_trained: 2768

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,277,42313.3,2768892,3.19902,10.99,-1.45,89.6518




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2778888
  custom_metrics: {}
  date: 2021-11-08_02-02-01
  done: false
  episode_len_mean: 91.55045871559633
  episode_media: {}
  episode_reward_max: 12.560000000000015
  episode_reward_mean: 3.9258715596330367
  episode_reward_min: -1.470000000000001
  episodes_this_iter: 109
  episodes_total: 30259
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3032152966556385
          entropy_coeff: 0.01
          kl: 0.013659935739768566
          policy_loss: -0.07859779928739254
          total_loss: 0.06456980355497864
          vf_explained_var: 0.9392614364624023
          vf_loss: 0.13508071293815588
    num_agent_steps_sampled: 2778888
    num_agent_steps_trained: 2778888
    num_steps_sampled: 2778888
    num_steps_trained: 27

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,278,42501.5,2778888,3.92587,12.56,-1.47,91.5505


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2788884
  custom_metrics: {}
  date: 2021-11-08_02-04-22
  done: false
  episode_len_mean: 91.32727272727273
  episode_media: {}
  episode_reward_max: 12.570000000000014
  episode_reward_mean: 3.0541818181818257
  episode_reward_min: -2.0999999999999988
  episodes_this_iter: 110
  episodes_total: 30369
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.314923475135086
          entropy_coeff: 0.01
          kl: 0.013589715104510238
          policy_loss: -0.07763693289688
          total_loss: 0.05993893175011771
          vf_explained_var: 0.9229065179824829
          vf_loss: 0.12976603011529034
    num_agent_steps_sampled: 2788884
    num_agent_steps_trained: 2788884
    num_steps_sampled: 2788884
    num_steps_trained: 27888

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,279,42643,2788884,3.05418,12.57,-2.1,91.3273




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2798880
  custom_metrics: {}
  date: 2021-11-08_02-07-06
  done: false
  episode_len_mean: 90.23636363636363
  episode_media: {}
  episode_reward_max: 13.190000000000014
  episode_reward_mean: 3.3672727272727347
  episode_reward_min: -1.4900000000000007
  episodes_this_iter: 110
  episodes_total: 30479
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3208967991364307
          entropy_coeff: 0.01
          kl: 0.013871348219527938
          policy_loss: -0.07680904206175071
          total_loss: 0.07836487381997653
          vf_explained_var: 0.9212888479232788
          vf_loss: 0.14678221771764194
    num_agent_steps_sampled: 2798880
    num_agent_steps_trained: 2798880
    num_steps_sampled: 2798880
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,280,42806.4,2798880,3.36727,13.19,-1.49,90.2364




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2808876
  custom_metrics: {}
  date: 2021-11-08_02-09-50
  done: false
  episode_len_mean: 90.71171171171171
  episode_media: {}
  episode_reward_max: 12.810000000000015
  episode_reward_mean: 3.489369369369378
  episode_reward_min: -1.7700000000000007
  episodes_this_iter: 111
  episodes_total: 30590
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.32667614985735
          entropy_coeff: 0.01
          kl: 0.012997319046317426
          policy_loss: -0.08106255146682771
          total_loss: 0.03797994579833287
          vf_explained_var: 0.9397525787353516
          vf_loss: 0.11269973964693072
    num_agent_steps_sampled: 2808876
    num_agent_steps_trained: 2808876
    num_steps_sampled: 2808876
    num_steps_trained: 2808

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,281,42971.1,2808876,3.48937,12.81,-1.77,90.7117


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2818872
  custom_metrics: {}
  date: 2021-11-08_02-12-11
  done: false
  episode_len_mean: 91.4074074074074
  episode_media: {}
  episode_reward_max: 10.780000000000017
  episode_reward_mean: 3.566388888888898
  episode_reward_min: -1.9700000000000009
  episodes_this_iter: 108
  episodes_total: 30698
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.319905108264369
          entropy_coeff: 0.01
          kl: 0.014076506946302103
          policy_loss: -0.07935386745768608
          total_loss: 0.052155013719939775
          vf_explained_var: 0.9365021586418152
          vf_loss: 0.12263988929633529
    num_agent_steps_sampled: 2818872
    num_agent_steps_trained: 2818872
    num_steps_sampled: 2818872
    num_steps_trained: 281

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,282,43111.6,2818872,3.56639,10.78,-1.97,91.4074




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2828868
  custom_metrics: {}
  date: 2021-11-08_02-14-41
  done: false
  episode_len_mean: 91.60909090909091
  episode_media: {}
  episode_reward_max: 12.930000000000016
  episode_reward_mean: 3.757454545454554
  episode_reward_min: -1.780000000000001
  episodes_this_iter: 110
  episodes_total: 30808
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.291002618553292
          entropy_coeff: 0.01
          kl: 0.01380849179580544
          policy_loss: -0.0766258130948513
          total_loss: 0.07293860380314927
          vf_explained_var: 0.9327720403671265
          vf_loss: 0.1410169718747274
    num_agent_steps_sampled: 2828868
    num_agent_steps_trained: 2828868
    num_steps_sampled: 2828868
    num_steps_trained: 2828868

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,283,43261.8,2828868,3.75745,12.93,-1.78,91.6091




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2838864
  custom_metrics: {}
  date: 2021-11-08_02-17-43
  done: false
  episode_len_mean: 88.92035398230088
  episode_media: {}
  episode_reward_max: 14.050000000000015
  episode_reward_mean: 3.786902654867265
  episode_reward_min: -1.6300000000000008
  episodes_this_iter: 113
  episodes_total: 30921
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3019890648686987
          entropy_coeff: 0.01
          kl: 0.01395030385030329
          policy_loss: -0.07778726247194995
          total_loss: 0.0702640461600107
          vf_explained_var: 0.9425718784332275
          vf_loss: 0.13929066244894878
    num_agent_steps_sampled: 2838864
    num_agent_steps_trained: 2838864
    num_steps_sampled: 2838864
    num_steps_trained: 2838

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,284,43443.4,2838864,3.7869,14.05,-1.63,88.9204


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2848860
  custom_metrics: {}
  date: 2021-11-08_02-20-03
  done: false
  episode_len_mean: 90.9090909090909
  episode_media: {}
  episode_reward_max: 12.450000000000014
  episode_reward_mean: 3.0096363636363708
  episode_reward_min: -1.5100000000000005
  episodes_this_iter: 110
  episodes_total: 31031
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.340269427625542
          entropy_coeff: 0.01
          kl: 0.01300035808255347
          policy_loss: -0.0803151280603284
          total_loss: 0.04763845411949178
          vf_explained_var: 0.9245647192001343
          vf_loss: 0.12173983491320386
    num_agent_steps_sampled: 2848860
    num_agent_steps_trained: 2848860
    num_steps_sampled: 2848860
    num_steps_trained: 28488

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,285,43584,2848860,3.00964,12.45,-1.51,90.9091




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2858856
  custom_metrics: {}
  date: 2021-11-08_02-23-12
  done: false
  episode_len_mean: 91.40366972477064
  episode_media: {}
  episode_reward_max: 13.130000000000011
  episode_reward_mean: 4.128440366972486
  episode_reward_min: -1.6900000000000008
  episodes_this_iter: 109
  episodes_total: 31140
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3085732682138427
          entropy_coeff: 0.01
          kl: 0.015089328728082198
          policy_loss: -0.07131462623643824
          total_loss: 0.10336641076410938
          vf_explained_var: 0.8873404860496521
          vf_loss: 0.16339139203803663
    num_agent_steps_sampled: 2858856
    num_agent_steps_trained: 2858856
    num_steps_sampled: 2858856
    num_steps_trained: 28

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,286,43772.3,2858856,4.12844,13.13,-1.69,91.4037




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2868852
  custom_metrics: {}
  date: 2021-11-08_02-25-45
  done: false
  episode_len_mean: 91.20183486238533
  episode_media: {}
  episode_reward_max: 12.830000000000013
  episode_reward_mean: 3.5050458715596404
  episode_reward_min: -1.5400000000000007
  episodes_this_iter: 109
  episodes_total: 31249
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3221048147250443
          entropy_coeff: 0.01
          kl: 0.013948505794442251
          policy_loss: -0.07437153935400594
          total_loss: 0.08954332673874421
          vf_explained_var: 0.9224669933319092
          vf_loss: 0.1553594740258896
    num_agent_steps_sampled: 2868852
    num_agent_steps_trained: 2868852
    num_steps_sampled: 2868852
    num_steps_trained: 28

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,287,43925.8,2868852,3.50505,12.83,-1.54,91.2018




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2878848
  custom_metrics: {}
  date: 2021-11-08_02-28-37
  done: false
  episode_len_mean: 92.70642201834862
  episode_media: {}
  episode_reward_max: 12.530000000000017
  episode_reward_mean: 3.0774311926605575
  episode_reward_min: -1.9500000000000008
  episodes_this_iter: 109
  episodes_total: 31358
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.340431830006787
          entropy_coeff: 0.01
          kl: 0.014320166693772398
          policy_loss: -0.07534126649554979
          total_loss: 0.0835668415734624
          vf_explained_var: 0.9094942808151245
          vf_loss: 0.1496892956762105
    num_agent_steps_sampled: 2878848
    num_agent_steps_trained: 2878848
    num_steps_sampled: 2878848
    num_steps_trained: 2878

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,288,44097.6,2878848,3.07743,12.53,-1.95,92.7064




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2888844
  custom_metrics: {}
  date: 2021-11-08_02-31-16
  done: false
  episode_len_mean: 91.22018348623853
  episode_media: {}
  episode_reward_max: 14.200000000000014
  episode_reward_mean: 3.9807339449541366
  episode_reward_min: -1.840000000000001
  episodes_this_iter: 109
  episodes_total: 31467
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.30450394092462
          entropy_coeff: 0.01
          kl: 0.014032927468433834
          policy_loss: -0.07877176730997032
          total_loss: 0.07283785111692725
          vf_explained_var: 0.9283273816108704
          vf_loss: 0.1426858951807277
    num_agent_steps_sampled: 2888844
    num_agent_steps_trained: 2888844
    num_steps_sampled: 2888844
    num_steps_trained: 28888

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,289,44256,2888844,3.98073,14.2,-1.84,91.2202




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2898840
  custom_metrics: {}
  date: 2021-11-08_02-34-01
  done: false
  episode_len_mean: 90.06363636363636
  episode_media: {}
  episode_reward_max: 10.880000000000011
  episode_reward_mean: 3.1631818181818243
  episode_reward_min: -1.2799999999999998
  episodes_this_iter: 110
  episodes_total: 31577
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.32152987728771
          entropy_coeff: 0.01
          kl: 0.012838104857929569
          policy_loss: -0.0775048727527834
          total_loss: 0.06258308177447727
          vf_explained_var: 0.9299209713935852
          vf_loss: 0.1340564454563408
    num_agent_steps_sampled: 2898840
    num_agent_steps_trained: 2898840
    num_steps_sampled: 2898840
    num_steps_trained: 28988

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,290,44421,2898840,3.16318,10.88,-1.28,90.0636




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2908836
  custom_metrics: {}
  date: 2021-11-08_02-36-32
  done: false
  episode_len_mean: 90.74107142857143
  episode_media: {}
  episode_reward_max: 12.730000000000013
  episode_reward_mean: 3.825982142857151
  episode_reward_min: -1.0400000000000005
  episodes_this_iter: 112
  episodes_total: 31689
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3025948856630896
          entropy_coeff: 0.01
          kl: 0.014093008983669412
          policy_loss: -0.07741713155952529
          total_loss: 0.0789498971361253
          vf_explained_var: 0.9205562472343445
          vf_loss: 0.14728734085543288
    num_agent_steps_sampled: 2908836
    num_agent_steps_trained: 2908836
    num_steps_sampled: 2908836
    num_steps_trained: 290

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,291,44572.8,2908836,3.82598,12.73,-1.04,90.7411




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2918832
  custom_metrics: {}
  date: 2021-11-08_02-39-20
  done: false
  episode_len_mean: 89.73873873873873
  episode_media: {}
  episode_reward_max: 14.200000000000017
  episode_reward_mean: 3.6695495495495583
  episode_reward_min: -1.4700000000000004
  episodes_this_iter: 111
  episodes_total: 31800
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3118348327457396
          entropy_coeff: 0.01
          kl: 0.013351603444698578
          policy_loss: -0.07399737469247009
          total_loss: 0.06332457484279433
          vf_explained_var: 0.9381957054138184
          vf_loss: 0.1300236753323394
    num_agent_steps_sampled: 2918832
    num_agent_steps_trained: 2918832
    num_steps_sampled: 2918832
    num_steps_trained: 29

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,292,44740.4,2918832,3.66955,14.2,-1.47,89.7387




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2928828
  custom_metrics: {}
  date: 2021-11-08_02-41-52
  done: false
  episode_len_mean: 92.08333333333333
  episode_media: {}
  episode_reward_max: 12.850000000000016
  episode_reward_mean: 2.937870370370378
  episode_reward_min: -1.680000000000001
  episodes_this_iter: 108
  episodes_total: 31908
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.333870546838157
          entropy_coeff: 0.01
          kl: 0.012968821566463593
          policy_loss: -0.08042227674562197
          total_loss: 0.04624432006484678
          vf_explained_var: 0.9150016903877258
          vf_loss: 0.12046070441078299
    num_agent_steps_sampled: 2928828
    num_agent_steps_trained: 2928828
    num_steps_sampled: 2928828
    num_steps_trained: 2928

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,293,44892.2,2928828,2.93787,12.85,-1.68,92.0833




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2938824
  custom_metrics: {}
  date: 2021-11-08_02-44-26
  done: false
  episode_len_mean: 90.60909090909091
  episode_media: {}
  episode_reward_max: 12.980000000000015
  episode_reward_mean: 3.1244545454545527
  episode_reward_min: -1.3000000000000007
  episodes_this_iter: 110
  episodes_total: 32018
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3368676625765286
          entropy_coeff: 0.01
          kl: 0.013508710730205714
          policy_loss: -0.07794395602570894
          total_loss: 0.053616719038631674
          vf_explained_var: 0.9216579794883728
          vf_loss: 0.12415481833900269
    num_agent_steps_sampled: 2938824
    num_agent_steps_trained: 2938824
    num_steps_sampled: 2938824
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,294,45046,2938824,3.12445,12.98,-1.3,90.6091




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2948820
  custom_metrics: {}
  date: 2021-11-08_02-47-10
  done: false
  episode_len_mean: 90.56756756756756
  episode_media: {}
  episode_reward_max: 10.830000000000013
  episode_reward_mean: 3.4536936936937024
  episode_reward_min: -1.5300000000000005
  episodes_this_iter: 111
  episodes_total: 32129
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3002869964664816
          entropy_coeff: 0.01
          kl: 0.01308071714492305
          policy_loss: -0.08066574098526412
          total_loss: 0.048067369951237726
          vf_explained_var: 0.9323979020118713
          vf_loss: 0.12193647166793672
    num_agent_steps_sampled: 2948820
    num_agent_steps_trained: 2948820
    num_steps_sampled: 2948820
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,295,45210.3,2948820,3.45369,10.83,-1.53,90.5676




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2958816
  custom_metrics: {}
  date: 2021-11-08_02-49-50
  done: false
  episode_len_mean: 90.4090909090909
  episode_media: {}
  episode_reward_max: 14.860000000000015
  episode_reward_mean: 4.3267272727272825
  episode_reward_min: -1.0500000000000005
  episodes_this_iter: 110
  episodes_total: 32239
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2930141835131197
          entropy_coeff: 0.01
          kl: 0.014273285762268792
          policy_loss: -0.07641386196303826
          total_loss: 0.07475103665557173
          vf_explained_var: 0.9393163323402405
          vf_loss: 0.14157870966049596
    num_agent_steps_sampled: 2958816
    num_agent_steps_trained: 2958816
    num_steps_sampled: 2958816
    num_steps_trained: 29

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,296,45369.8,2958816,4.32673,14.86,-1.05,90.4091




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2968812
  custom_metrics: {}
  date: 2021-11-08_02-52-18
  done: false
  episode_len_mean: 92.86111111111111
  episode_media: {}
  episode_reward_max: 12.920000000000016
  episode_reward_mean: 3.587777777777786
  episode_reward_min: -1.800000000000001
  episodes_this_iter: 108
  episodes_total: 32347
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3121811245241735
          entropy_coeff: 0.01
          kl: 0.013794658056032085
          policy_loss: -0.07607949097664693
          total_loss: 0.05996192949386234
          vf_explained_var: 0.9224866032600403
          vf_loss: 0.12773727532635387
    num_agent_steps_sampled: 2968812
    num_agent_steps_trained: 2968812
    num_steps_sampled: 2968812
    num_steps_trained: 296

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,297,45518.4,2968812,3.58778,12.92,-1.8,92.8611




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2978808
  custom_metrics: {}
  date: 2021-11-08_02-54-54
  done: false
  episode_len_mean: 89.54054054054055
  episode_media: {}
  episode_reward_max: 12.810000000000013
  episode_reward_mean: 3.366216216216224
  episode_reward_min: -1.8900000000000008
  episodes_this_iter: 111
  episodes_total: 32458
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3105558118249614
          entropy_coeff: 0.01
          kl: 0.014729702139811337
          policy_loss: -0.07619124489287153
          total_loss: 0.09487720592520558
          vf_explained_var: 0.9294339418411255
          vf_loss: 0.1606179057341865
    num_agent_steps_sampled: 2978808
    num_agent_steps_trained: 2978808
    num_steps_sampled: 2978808
    num_steps_trained: 297

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,298,45673.9,2978808,3.36622,12.81,-1.89,89.5405




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2988804
  custom_metrics: {}
  date: 2021-11-08_02-57-46
  done: false
  episode_len_mean: 90.05357142857143
  episode_media: {}
  episode_reward_max: 16.94999999999999
  episode_reward_mean: 3.745446428571436
  episode_reward_min: -2.04
  episodes_this_iter: 112
  episodes_total: 32570
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.291561312145657
          entropy_coeff: 0.01
          kl: 0.01414886684614145
          policy_loss: -0.07341664472005816
          total_loss: 0.09050329676391478
          vf_explained_var: 0.9228647351264954
          vf_loss: 0.15460266643880397
    num_agent_steps_sampled: 2988804
    num_agent_steps_trained: 2988804
    num_steps_sampled: 2988804
    num_steps_trained: 2988804
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,299,45845.7,2988804,3.74545,16.95,-2.04,90.0536




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 2998800
  custom_metrics: {}
  date: 2021-11-08_03-00-54
  done: false
  episode_len_mean: 91.19266055045871
  episode_media: {}
  episode_reward_max: 12.690000000000014
  episode_reward_mean: 3.6282568807339546
  episode_reward_min: -1.6900000000000006
  episodes_this_iter: 109
  episodes_total: 32679
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.3146348794301352
          entropy_coeff: 0.01
          kl: 0.01352763806854723
          policy_loss: -0.07601152987370634
          total_loss: 0.07774508781412728
          vf_explained_var: 0.9215949773788452
          vf_loss: 0.14608531476786504
    num_agent_steps_sampled: 2998800
    num_agent_steps_trained: 2998800
    num_steps_sampled: 2998800
    num_steps_trained: 29

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,300,46034.3,2998800,3.62826,12.69,-1.69,91.1927


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3008796
  custom_metrics: {}
  date: 2021-11-08_03-03-15
  done: false
  episode_len_mean: 91.26605504587155
  episode_media: {}
  episode_reward_max: 11.150000000000013
  episode_reward_mean: 3.653119266055054
  episode_reward_min: -1.990000000000001
  episodes_this_iter: 109
  episodes_total: 32788
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.30558620538467
          entropy_coeff: 0.01
          kl: 0.013904272899989065
          policy_loss: -0.07510018111803593
          total_loss: 0.09181024745409178
          vf_explained_var: 0.9253290891647339
          vf_loss: 0.15829061745610248
    num_agent_steps_sampled: 3008796
    num_agent_steps_trained: 3008796
    num_steps_sampled: 3008796
    num_steps_trained: 30087

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,301,46174.8,3008796,3.65312,11.15,-1.99,91.2661




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3018792
  custom_metrics: {}
  date: 2021-11-08_03-05-51
  done: false
  episode_len_mean: 89.69642857142857
  episode_media: {}
  episode_reward_max: 14.480000000000016
  episode_reward_mean: 4.006160714285723
  episode_reward_min: -1.5600000000000007
  episodes_this_iter: 112
  episodes_total: 32900
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2599156927858663
          entropy_coeff: 0.01
          kl: 0.014731068288079683
          policy_loss: -0.0762932083903788
          total_loss: 0.09180384700576592
          vf_explained_var: 0.9364080429077148
          vf_loss: 0.15713699656323746
    num_agent_steps_sampled: 3018792
    num_agent_steps_trained: 3018792
    num_steps_sampled: 3018792
    num_steps_trained: 301

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,302,46331.1,3018792,4.00616,14.48,-1.56,89.6964




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3028788
  custom_metrics: {}
  date: 2021-11-08_03-08-25
  done: false
  episode_len_mean: 89.75892857142857
  episode_media: {}
  episode_reward_max: 12.810000000000013
  episode_reward_mean: 3.6880357142857227
  episode_reward_min: -1.870000000000001
  episodes_this_iter: 112
  episodes_total: 33012
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.287842667612255
          entropy_coeff: 0.01
          kl: 0.013424222039778225
          policy_loss: -0.07897596758368433
          total_loss: 0.07609384566481807
          vf_explained_var: 0.9320546388626099
          vf_loss: 0.14736618235325202
    num_agent_steps_sampled: 3028788
    num_agent_steps_trained: 3028788
    num_steps_sampled: 3028788
    num_steps_trained: 302

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,303,46484.3,3028788,3.68804,12.81,-1.87,89.7589




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3038784
  custom_metrics: {}
  date: 2021-11-08_03-11-10
  done: false
  episode_len_mean: 89.01801801801801
  episode_media: {}
  episode_reward_max: 10.980000000000015
  episode_reward_mean: 4.179639639639649
  episode_reward_min: -1.1500000000000004
  episodes_this_iter: 111
  episodes_total: 33123
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2663751411641764
          entropy_coeff: 0.01
          kl: 0.014850025736966458
          policy_loss: -0.07677815055203999
          total_loss: 0.0991389229766324
          vf_explained_var: 0.9334408044815063
          vf_loss: 0.16475060880502573
    num_agent_steps_sampled: 3038784
    num_agent_steps_trained: 3038784
    num_steps_sampled: 3038784
    num_steps_trained: 303

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,304,46649.7,3038784,4.17964,10.98,-1.15,89.018


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3048780
  custom_metrics: {}
  date: 2021-11-08_03-13-29
  done: false
  episode_len_mean: 91.73636363636363
  episode_media: {}
  episode_reward_max: 10.960000000000013
  episode_reward_mean: 3.747363636363646
  episode_reward_min: -1.2000000000000004
  episodes_this_iter: 110
  episodes_total: 33233
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.281994484632443
          entropy_coeff: 0.01
          kl: 0.013100208095922009
          policy_loss: -0.07689594034678661
          total_loss: 0.06970502359187629
          vf_explained_var: 0.9314793348312378
          vf_loss: 0.13957699646090724
    num_agent_steps_sampled: 3048780
    num_agent_steps_trained: 3048780
    num_steps_sampled: 3048780
    num_steps_trained: 304

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,305,46788.8,3048780,3.74736,10.96,-1.2,91.7364




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3058776
  custom_metrics: {}
  date: 2021-11-08_03-16-16
  done: false
  episode_len_mean: 90.46363636363637
  episode_media: {}
  episode_reward_max: 13.050000000000013
  episode_reward_mean: 3.590090909090918
  episode_reward_min: -1.6200000000000006
  episodes_this_iter: 110
  episodes_total: 33343
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.285744542341966
          entropy_coeff: 0.01
          kl: 0.014225853104995082
          policy_loss: -0.07215627933748894
          total_loss: 0.08405771514830682
          vf_explained_var: 0.9334003925323486
          vf_loss: 0.14666316681820105
    num_agent_steps_sampled: 3058776
    num_agent_steps_trained: 3058776
    num_steps_sampled: 3058776
    num_steps_trained: 305

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,306,46956.1,3058776,3.59009,13.05,-1.62,90.4636




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3068772
  custom_metrics: {}
  date: 2021-11-08_03-19-11
  done: false
  episode_len_mean: 90.86363636363636
  episode_media: {}
  episode_reward_max: 12.760000000000018
  episode_reward_mean: 4.01881818181819
  episode_reward_min: -1.4800000000000006
  episodes_this_iter: 110
  episodes_total: 33453
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.284923804519523
          entropy_coeff: 0.01
          kl: 0.01374788412219921
          policy_loss: -0.07118433981847305
          total_loss: 0.09003026024717041
          vf_explained_var: 0.9300100803375244
          vf_loss: 0.15274443813384725
    num_agent_steps_sampled: 3068772
    num_agent_steps_trained: 3068772
    num_steps_sampled: 3068772
    num_steps_trained: 30687

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,307,47130.5,3068772,4.01882,12.76,-1.48,90.8636




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3078768
  custom_metrics: {}
  date: 2021-11-08_03-21-44
  done: false
  episode_len_mean: 91.78899082568807
  episode_media: {}
  episode_reward_max: 15.00000000000001
  episode_reward_mean: 3.627431192660558
  episode_reward_min: -1.8400000000000012
  episodes_this_iter: 109
  episodes_total: 33562
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2887985579987875
          entropy_coeff: 0.01
          kl: 0.01396530446981555
          policy_loss: -0.07679112601356629
          total_loss: 0.08304288830194208
          vf_explained_var: 0.9109708070755005
          vf_loss: 0.15090729041168324
    num_agent_steps_sampled: 3078768
    num_agent_steps_trained: 3078768
    num_steps_sampled: 3078768
    num_steps_trained: 3078

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,308,47283.4,3078768,3.62743,15,-1.84,91.789




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3088764
  custom_metrics: {}
  date: 2021-11-08_03-24-15
  done: false
  episode_len_mean: 91.44036697247707
  episode_media: {}
  episode_reward_max: 12.910000000000016
  episode_reward_mean: 3.8462385321101005
  episode_reward_min: -1.4700000000000006
  episodes_this_iter: 109
  episodes_total: 33671
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.280952625396924
          entropy_coeff: 0.01
          kl: 0.014600913969998764
          policy_loss: -0.07480576055713443
          total_loss: 0.11316714617781914
          vf_explained_var: 0.915919840335846
          vf_loss: 0.1775197255735596
    num_agent_steps_sampled: 3088764
    num_agent_steps_trained: 3088764
    num_steps_sampled: 3088764
    num_steps_trained: 3088

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,309,47434.8,3088764,3.84624,12.91,-1.47,91.4404




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3098760
  custom_metrics: {}
  date: 2021-11-08_03-27-02
  done: false
  episode_len_mean: 90.14414414414415
  episode_media: {}
  episode_reward_max: 10.820000000000014
  episode_reward_mean: 3.309549549549558
  episode_reward_min: -1.7200000000000006
  episodes_this_iter: 111
  episodes_total: 33782
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2972291950486663
          entropy_coeff: 0.01
          kl: 0.01256793070260167
          policy_loss: -0.07781468053372242
          total_loss: 0.06346588053254999
          vf_explained_var: 0.9301301836967468
          vf_loss: 0.13562153515079592
    num_agent_steps_sampled: 3098760
    num_agent_steps_trained: 3098760
    num_steps_sampled: 3098760
    num_steps_trained: 309

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,310,47601.5,3098760,3.30955,10.82,-1.72,90.1441




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3108756
  custom_metrics: {}
  date: 2021-11-08_03-29-43
  done: false
  episode_len_mean: 89.47321428571429
  episode_media: {}
  episode_reward_max: 12.910000000000014
  episode_reward_mean: 3.75517857142858
  episode_reward_min: -1.7300000000000006
  episodes_this_iter: 112
  episodes_total: 33894
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.290946831051101
          entropy_coeff: 0.01
          kl: 0.01303914766673555
          policy_loss: -0.07572113300681624
          total_loss: 0.06706725331182536
          vf_explained_var: 0.9332742691040039
          vf_loss: 0.13599304688903383
    num_agent_steps_sampled: 3108756
    num_agent_steps_trained: 3108756
    num_steps_sampled: 3108756
    num_steps_trained: 31087

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,311,47762.3,3108756,3.75518,12.91,-1.73,89.4732




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3118752
  custom_metrics: {}
  date: 2021-11-08_03-32-16
  done: false
  episode_len_mean: 90.42727272727272
  episode_media: {}
  episode_reward_max: 13.040000000000013
  episode_reward_mean: 3.613545454545462
  episode_reward_min: -1.910000000000001
  episodes_this_iter: 110
  episodes_total: 34004
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.297121034320603
          entropy_coeff: 0.01
          kl: 0.013502955056141934
          policy_loss: -0.07788110995649272
          total_loss: 0.07886505857683145
          vf_explained_var: 0.9058569669723511
          vf_loss: 0.14895595845042003
    num_agent_steps_sampled: 3118752
    num_agent_steps_trained: 3118752
    num_steps_sampled: 3118752
    num_steps_trained: 3118

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,312,47915.3,3118752,3.61355,13.04,-1.91,90.4273




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3128748
  custom_metrics: {}
  date: 2021-11-08_03-34-58
  done: false
  episode_len_mean: 90.49549549549549
  episode_media: {}
  episode_reward_max: 12.720000000000017
  episode_reward_mean: 3.7445945945946035
  episode_reward_min: -1.7900000000000011
  episodes_this_iter: 111
  episodes_total: 34115
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2678800847795273
          entropy_coeff: 0.01
          kl: 0.014236405219396585
          policy_loss: -0.0746441928924531
          total_loss: 0.08729449518056762
          vf_explained_var: 0.9239442348480225
          vf_loss: 0.1521851779256239
    num_agent_steps_sampled: 3128748
    num_agent_steps_trained: 3128748
    num_steps_sampled: 3128748
    num_steps_trained: 312

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,313,48077.8,3128748,3.74459,12.72,-1.79,90.4955




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3138744
  custom_metrics: {}
  date: 2021-11-08_03-37-33
  done: false
  episode_len_mean: 89.63063063063063
  episode_media: {}
  episode_reward_max: 12.670000000000012
  episode_reward_mean: 3.546846846846855
  episode_reward_min: -1.6000000000000008
  episodes_this_iter: 111
  episodes_total: 34226
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2740612873664268
          entropy_coeff: 0.01
          kl: 0.013128001422645199
          policy_loss: -0.07927137994740763
          total_loss: 0.041883302139293435
          vf_explained_var: 0.938593327999115
          vf_loss: 0.11398806595999715
    num_agent_steps_sampled: 3138744
    num_agent_steps_trained: 3138744
    num_steps_sampled: 3138744
    num_steps_trained: 31

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,314,48232.1,3138744,3.54685,12.67,-1.6,89.6306




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3148740
  custom_metrics: {}
  date: 2021-11-08_03-40-51
  done: false
  episode_len_mean: 90.31531531531532
  episode_media: {}
  episode_reward_max: 14.300000000000017
  episode_reward_mean: 3.8023423423423517
  episode_reward_min: -1.3900000000000006
  episodes_this_iter: 111
  episodes_total: 34337
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2907290059277137
          entropy_coeff: 0.01
          kl: 0.013333596729805141
          policy_loss: -0.0763426824624085
          total_loss: 0.060668563592828746
          vf_explained_var: 0.9384514093399048
          vf_loss: 0.12954293513495443
    num_agent_steps_sampled: 3148740
    num_agent_steps_trained: 3148740
    num_steps_sampled: 3148740
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,315,48430.6,3148740,3.80234,14.3,-1.39,90.3153




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3158736
  custom_metrics: {}
  date: 2021-11-08_03-43-31
  done: false
  episode_len_mean: 89.53571428571429
  episode_media: {}
  episode_reward_max: 12.630000000000015
  episode_reward_mean: 3.4510714285714363
  episode_reward_min: -1.800000000000001
  episodes_this_iter: 112
  episodes_total: 34449
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.276303341246059
          entropy_coeff: 0.01
          kl: 0.013932581506874502
          policy_loss: -0.0770833728914587
          total_loss: 0.07546050351622523
          vf_explained_var: 0.9285145998001099
          vf_loss: 0.14356674721671475
    num_agent_steps_sampled: 3158736
    num_agent_steps_trained: 3158736
    num_steps_sampled: 3158736
    num_steps_trained: 3158

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,316,48590,3158736,3.45107,12.63,-1.8,89.5357




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3168732
  custom_metrics: {}
  date: 2021-11-08_03-46-35
  done: false
  episode_len_mean: 89.70270270270271
  episode_media: {}
  episode_reward_max: 10.600000000000016
  episode_reward_mean: 3.814594594594603
  episode_reward_min: -1.7200000000000006
  episodes_this_iter: 111
  episodes_total: 34560
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2679415747650666
          entropy_coeff: 0.01
          kl: 0.014242918059189327
          policy_loss: -0.07670866087333769
          total_loss: 0.08233193575651344
          vf_explained_var: 0.9261979460716248
          vf_loss: 0.149272863765876
    num_agent_steps_sampled: 3168732
    num_agent_steps_trained: 3168732
    num_steps_sampled: 3168732
    num_steps_trained: 3168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,317,48773.8,3168732,3.81459,10.6,-1.72,89.7027




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3178728
  custom_metrics: {}
  date: 2021-11-08_03-49-11
  done: false
  episode_len_mean: 91.5229357798165
  episode_media: {}
  episode_reward_max: 10.780000000000012
  episode_reward_mean: 3.8151376146789078
  episode_reward_min: -2.0999999999999988
  episodes_this_iter: 109
  episodes_total: 34669
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2731873722157925
          entropy_coeff: 0.01
          kl: 0.013089692153971934
          policy_loss: -0.0771182718471839
          total_loss: 0.06669757341583953
          vf_explained_var: 0.9316648244857788
          vf_loss: 0.13672776437467998
    num_agent_steps_sampled: 3178728
    num_agent_steps_trained: 3178728
    num_steps_sampled: 3178728
    num_steps_trained: 317

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,318,48930.6,3178728,3.81514,10.78,-2.1,91.5229




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3188724
  custom_metrics: {}
  date: 2021-11-08_03-51-55
  done: false
  episode_len_mean: 92.83177570093459
  episode_media: {}
  episode_reward_max: 10.760000000000018
  episode_reward_mean: 3.447757009345802
  episode_reward_min: -1.4300000000000006
  episodes_this_iter: 107
  episodes_total: 34776
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2932657015629303
          entropy_coeff: 0.01
          kl: 0.01343983427002958
          policy_loss: -0.07572020979948406
          total_loss: 0.07260781872508108
          vf_explained_var: 0.9288674592971802
          vf_loss: 0.14064306238602498
    num_agent_steps_sampled: 3188724
    num_agent_steps_trained: 3188724
    num_steps_sampled: 3188724
    num_steps_trained: 318

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,319,49094.5,3188724,3.44776,10.76,-1.43,92.8318




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3198720
  custom_metrics: {}
  date: 2021-11-08_03-54-33
  done: false
  episode_len_mean: 90.15315315315316
  episode_media: {}
  episode_reward_max: 18.789999999999964
  episode_reward_mean: 3.7576576576576657
  episode_reward_min: -1.4800000000000009
  episodes_this_iter: 111
  episodes_total: 34887
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2991710247137607
          entropy_coeff: 0.01
          kl: 0.015282795142004531
          policy_loss: -0.07281927071973426
          total_loss: 0.11397586659743236
          vf_explained_var: 0.9160694479942322
          vf_loss: 0.17497073030019672
    num_agent_steps_sampled: 3198720
    num_agent_steps_trained: 3198720
    num_steps_sampled: 3198720
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,320,49252.1,3198720,3.75766,18.79,-1.48,90.1532




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3208716
  custom_metrics: {}
  date: 2021-11-08_03-57-17
  done: false
  episode_len_mean: 91.06363636363636
  episode_media: {}
  episode_reward_max: 16.80999999999997
  episode_reward_mean: 3.7151818181818257
  episode_reward_min: -1.330000000000001
  episodes_this_iter: 110
  episodes_total: 34997
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.26000243557824
          entropy_coeff: 0.01
          kl: 0.013631245583280788
          policy_loss: -0.07614770699531222
          total_loss: 0.08049227439631254
          vf_explained_var: 0.9289596676826477
          vf_loss: 0.14818632270121931
    num_agent_steps_sampled: 3208716
    num_agent_steps_trained: 3208716
    num_steps_sampled: 3208716
    num_steps_trained: 32087

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,321,49416,3208716,3.71518,16.81,-1.33,91.0636


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3218712
  custom_metrics: {}
  date: 2021-11-08_03-59-38
  done: false
  episode_len_mean: 91.1
  episode_media: {}
  episode_reward_max: 12.830000000000013
  episode_reward_mean: 3.8554545454545543
  episode_reward_min: -0.9600000000000005
  episodes_this_iter: 110
  episodes_total: 35107
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2457307173655585
          entropy_coeff: 0.01
          kl: 0.013947001417969986
          policy_loss: -0.07674821579797018
          total_loss: 0.08741023278691702
          vf_explained_var: 0.9404701590538025
          vf_loss: 0.15484274277447635
    num_agent_steps_sampled: 3218712
    num_agent_steps_trained: 3218712
    num_steps_sampled: 3218712
    num_steps_trained: 3218712
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,322,49556.9,3218712,3.85545,12.83,-0.96,91.1




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3228708
  custom_metrics: {}
  date: 2021-11-08_04-02-27
  done: false
  episode_len_mean: 89.66666666666667
  episode_media: {}
  episode_reward_max: 11.200000000000014
  episode_reward_mean: 3.853333333333341
  episode_reward_min: -1.5600000000000005
  episodes_this_iter: 111
  episodes_total: 35218
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2550803098923122
          entropy_coeff: 0.01
          kl: 0.01383104390054708
          policy_loss: -0.0773248737407291
          total_loss: 0.07352836860550774
          vf_explained_var: 0.9392185807228088
          vf_loss: 0.1418951977060264
    num_agent_steps_sampled: 3228708
    num_agent_steps_trained: 3228708
    num_steps_sampled: 3228708
    num_steps_trained: 32287

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,323,49726.4,3228708,3.85333,11.2,-1.56,89.6667




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3238704
  custom_metrics: {}
  date: 2021-11-08_04-05-16
  done: false
  episode_len_mean: 89.58928571428571
  episode_media: {}
  episode_reward_max: 11.020000000000014
  episode_reward_mean: 3.4274107142857226
  episode_reward_min: -1.8500000000000014
  episodes_this_iter: 112
  episodes_total: 35330
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2662578833408844
          entropy_coeff: 0.01
          kl: 0.01485846218358801
          policy_loss: -0.07502348394825673
          total_loss: 0.0824170348385715
          vf_explained_var: 0.9279797673225403
          vf_loss: 0.14625366222845693
    num_agent_steps_sampled: 3238704
    num_agent_steps_trained: 3238704
    num_steps_sampled: 3238704
    num_steps_trained: 323

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,324,49894.9,3238704,3.42741,11.02,-1.85,89.5893


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3248700
  custom_metrics: {}
  date: 2021-11-08_04-07-35
  done: false
  episode_len_mean: 92.34862385321101
  episode_media: {}
  episode_reward_max: 12.880000000000015
  episode_reward_mean: 3.7699082568807425
  episode_reward_min: -1.970000000000001
  episodes_this_iter: 109
  episodes_total: 35439
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2790074994421414
          entropy_coeff: 0.01
          kl: 0.013657404845905987
          policy_loss: -0.07633908048081092
          total_loss: 0.06611541195685028
          vf_explained_var: 0.9357666969299316
          vf_loss: 0.13413129039267954
    num_agent_steps_sampled: 3248700
    num_agent_steps_trained: 3248700
    num_steps_sampled: 3248700
    num_steps_trained: 32

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,325,50034.4,3248700,3.76991,12.88,-1.97,92.3486




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3258696
  custom_metrics: {}
  date: 2021-11-08_04-11-02
  done: false
  episode_len_mean: 92.3425925925926
  episode_media: {}
  episode_reward_max: 15.240000000000007
  episode_reward_mean: 3.807037037037045
  episode_reward_min: -2.000000000000001
  episodes_this_iter: 108
  episodes_total: 35547
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2522569035872433
          entropy_coeff: 0.01
          kl: 0.015181848361892932
          policy_loss: -0.07024558474047062
          total_loss: 0.11883553065136712
          vf_explained_var: 0.9140962958335876
          vf_loss: 0.17701753510687596
    num_agent_steps_sampled: 3258696
    num_agent_steps_trained: 3258696
    num_steps_sampled: 3258696
    num_steps_trained: 3258

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,326,50240.8,3258696,3.80704,15.24,-2,92.3426


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3268692
  custom_metrics: {}
  date: 2021-11-08_04-13-23
  done: false
  episode_len_mean: 92.31481481481481
  episode_media: {}
  episode_reward_max: 12.910000000000016
  episode_reward_mean: 4.154351851851861
  episode_reward_min: -2.0300000000000002
  episodes_this_iter: 108
  episodes_total: 35655
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.253525491861197
          entropy_coeff: 0.01
          kl: 0.013922393516242346
          policy_loss: -0.07165008771718823
          total_loss: 0.09354561330575464
          vf_explained_var: 0.931423008441925
          vf_loss: 0.15601400246955135
    num_agent_steps_sampled: 3268692
    num_agent_steps_trained: 3268692
    num_steps_sampled: 3268692
    num_steps_trained: 3268

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,327,50381.4,3268692,4.15435,12.91,-2.03,92.3148




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3278688
  custom_metrics: {}
  date: 2021-11-08_04-16-42
  done: false
  episode_len_mean: 89.7927927927928
  episode_media: {}
  episode_reward_max: 12.710000000000006
  episode_reward_mean: 3.4775675675675757
  episode_reward_min: -2.169999999999999
  episodes_this_iter: 111
  episodes_total: 35766
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.252226863661383
          entropy_coeff: 0.01
          kl: 0.013458504860437183
          policy_loss: -0.07373666662054185
          total_loss: 0.07138166663945358
          vf_explained_var: 0.9339299201965332
          vf_loss: 0.1369804450016246
    num_agent_steps_sampled: 3278688
    num_agent_steps_trained: 3278688
    num_steps_sampled: 3278688
    num_steps_trained: 32786

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,328,50581.1,3278688,3.47757,12.71,-2.17,89.7928


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3288684
  custom_metrics: {}
  date: 2021-11-08_04-19-03
  done: false
  episode_len_mean: 92.43119266055047
  episode_media: {}
  episode_reward_max: 12.750000000000014
  episode_reward_mean: 3.0718348623853284
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 109
  episodes_total: 35875
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2640748327613895
          entropy_coeff: 0.01
          kl: 0.013992209450282827
          policy_loss: -0.0763209345869911
          total_loss: 0.08380611196048876
          vf_explained_var: 0.909416913986206
          vf_loss: 0.15089179233719524
    num_agent_steps_sampled: 3288684
    num_agent_steps_trained: 3288684
    num_steps_sampled: 3288684
    num_steps_trained: 328

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,329,50722.2,3288684,3.07183,12.75,-2.05,92.4312




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3298680
  custom_metrics: {}
  date: 2021-11-08_04-22-09
  done: false
  episode_len_mean: 88.53097345132744
  episode_media: {}
  episode_reward_max: 10.770000000000016
  episode_reward_mean: 3.8531858407079724
  episode_reward_min: -1.7100000000000009
  episodes_this_iter: 113
  episodes_total: 35988
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2343690144710053
          entropy_coeff: 0.01
          kl: 0.014473775831268403
          policy_loss: -0.07305499990032906
          total_loss: 0.08108648521881391
          vf_explained_var: 0.9356955289840698
          vf_loss: 0.14351210335954132
    num_agent_steps_sampled: 3298680
    num_agent_steps_trained: 3298680
    num_steps_sampled: 3298680
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,330,50907.4,3298680,3.85319,10.77,-1.71,88.531




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3308676
  custom_metrics: {}
  date: 2021-11-08_04-25-03
  done: false
  episode_len_mean: 89.38392857142857
  episode_media: {}
  episode_reward_max: 14.890000000000015
  episode_reward_mean: 3.734642857142865
  episode_reward_min: -1.8700000000000008
  episodes_this_iter: 112
  episodes_total: 36100
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.232542047031924
          entropy_coeff: 0.01
          kl: 0.014222499178490217
          policy_loss: -0.07305457670018714
          total_loss: 0.10267128603628431
          vf_explained_var: 0.9333191514015198
          vf_loss: 0.165650651552993
    num_agent_steps_sampled: 3308676
    num_agent_steps_trained: 3308676
    num_steps_sampled: 3308676
    num_steps_trained: 33086

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,331,51082,3308676,3.73464,14.89,-1.87,89.3839




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3318672
  custom_metrics: {}
  date: 2021-11-08_04-27-56
  done: false
  episode_len_mean: 88.22321428571429
  episode_media: {}
  episode_reward_max: 11.160000000000013
  episode_reward_mean: 3.2481250000000066
  episode_reward_min: -2.259999999999996
  episodes_this_iter: 112
  episodes_total: 36212
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2501306020296536
          entropy_coeff: 0.01
          kl: 0.013096010688332311
          policy_loss: -0.07537526061646958
          total_loss: 0.07138223890851961
          vf_explained_var: 0.915462851524353
          vf_loss: 0.13942445534416753
    num_agent_steps_sampled: 3318672
    num_agent_steps_trained: 3318672
    num_steps_sampled: 3318672
    num_steps_trained: 331

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,332,51254.9,3318672,3.24813,11.16,-2.26,88.2232




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3328668
  custom_metrics: {}
  date: 2021-11-08_04-30-48
  done: false
  episode_len_mean: 90.22321428571429
  episode_media: {}
  episode_reward_max: 12.950000000000014
  episode_reward_mean: 3.585000000000008
  episode_reward_min: -1.7200000000000009
  episodes_this_iter: 112
  episodes_total: 36324
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2562202938601503
          entropy_coeff: 0.01
          kl: 0.013195319750160705
          policy_loss: -0.07154013545324023
          total_loss: 0.08359736031056661
          vf_explained_var: 0.9307571053504944
          vf_loss: 0.14763910983585649
    num_agent_steps_sampled: 3328668
    num_agent_steps_trained: 3328668
    num_steps_sampled: 3328668
    num_steps_trained: 33

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,333,51426.3,3328668,3.585,12.95,-1.72,90.2232




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3338664
  custom_metrics: {}
  date: 2021-11-08_04-33-39
  done: false
  episode_len_mean: 89.38738738738739
  episode_media: {}
  episode_reward_max: 14.330000000000016
  episode_reward_mean: 3.6994594594594674
  episode_reward_min: -1.3900000000000008
  episodes_this_iter: 111
  episodes_total: 36435
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2495338327864296
          entropy_coeff: 0.01
          kl: 0.013905552555137606
          policy_loss: -0.07486971310124947
          total_loss: 0.06907785080978249
          vf_explained_var: 0.9280301332473755
          vf_loss: 0.13476431482813805
    num_agent_steps_sampled: 3338664
    num_agent_steps_trained: 3338664
    num_steps_sampled: 3338664
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,334,51597.5,3338664,3.69946,14.33,-1.39,89.3874




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3348660
  custom_metrics: {}
  date: 2021-11-08_04-36-44
  done: false
  episode_len_mean: 87.86842105263158
  episode_media: {}
  episode_reward_max: 14.580000000000016
  episode_reward_mean: 3.870087719298253
  episode_reward_min: -1.2300000000000006
  episodes_this_iter: 114
  episodes_total: 36549
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.255609434995896
          entropy_coeff: 0.01
          kl: 0.014276525115381565
          policy_loss: -0.06978669793337074
          total_loss: 0.10596141854746856
          vf_explained_var: 0.9299802780151367
          vf_loss: 0.16578050036238046
    num_agent_steps_sampled: 3348660
    num_agent_steps_trained: 3348660
    num_steps_sampled: 3348660
    num_steps_trained: 334

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,335,51783,3348660,3.87009,14.58,-1.23,87.8684




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3358656
  custom_metrics: {}
  date: 2021-11-08_04-39-34
  done: false
  episode_len_mean: 87.47368421052632
  episode_media: {}
  episode_reward_max: 13.050000000000013
  episode_reward_mean: 4.184912280701763
  episode_reward_min: -1.5600000000000005
  episodes_this_iter: 114
  episodes_total: 36663
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2320980255420393
          entropy_coeff: 0.01
          kl: 0.013730207654299173
          policy_loss: -0.07271702786127472
          total_loss: 0.09022141946877679
          vf_explained_var: 0.9366027116775513
          vf_loss: 0.15398029644742736
    num_agent_steps_sampled: 3358656
    num_agent_steps_trained: 3358656
    num_steps_sampled: 3358656
    num_steps_trained: 33

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,336,51952.4,3358656,4.18491,13.05,-1.56,87.4737




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3368652
  custom_metrics: {}
  date: 2021-11-08_04-43-02
  done: false
  episode_len_mean: 89.80357142857143
  episode_media: {}
  episode_reward_max: 12.570000000000013
  episode_reward_mean: 4.0216964285714365
  episode_reward_min: -0.9600000000000009
  episodes_this_iter: 112
  episodes_total: 36775
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2454314005680573
          entropy_coeff: 0.01
          kl: 0.013865022893170716
          policy_loss: -0.07458895579553568
          total_loss: 0.09464355071958823
          vf_explained_var: 0.9256706833839417
          vf_loss: 0.16010056439086667
    num_agent_steps_sampled: 3368652
    num_agent_steps_trained: 3368652
    num_steps_sampled: 3368652
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,337,52160.2,3368652,4.0217,12.57,-0.96,89.8036




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3378648
  custom_metrics: {}
  date: 2021-11-08_04-45-36
  done: false
  episode_len_mean: 90.43243243243244
  episode_media: {}
  episode_reward_max: 12.760000000000012
  episode_reward_mean: 4.05207207207208
  episode_reward_min: -1.1400000000000006
  episodes_this_iter: 111
  episodes_total: 36886
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2372478362841486
          entropy_coeff: 0.01
          kl: 0.013550791104758927
          policy_loss: -0.07415636490361813
          total_loss: 0.07491222801658078
          vf_explained_var: 0.9370580315589905
          vf_loss: 0.1405706743709743
    num_agent_steps_sampled: 3378648
    num_agent_steps_trained: 3378648
    num_steps_sampled: 3378648
    num_steps_trained: 3378

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,338,52314.4,3378648,4.05207,12.76,-1.14,90.4324




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3388644
  custom_metrics: {}
  date: 2021-11-08_04-48-39
  done: false
  episode_len_mean: 88.83783783783784
  episode_media: {}
  episode_reward_max: 12.830000000000014
  episode_reward_mean: 3.978468468468477
  episode_reward_min: -1.7100000000000009
  episodes_this_iter: 111
  episodes_total: 36997
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2373445804302508
          entropy_coeff: 0.01
          kl: 0.014170427849310255
          policy_loss: -0.07549467953399588
          total_loss: 0.0938503842195894
          vf_explained_var: 0.9370816349983215
          vf_loss: 0.1594365029810713
    num_agent_steps_sampled: 3388644
    num_agent_steps_trained: 3388644
    num_steps_sampled: 3388644
    num_steps_trained: 3388

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,339,52497,3388644,3.97847,12.83,-1.71,88.8378


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3398640
  custom_metrics: {}
  date: 2021-11-08_04-51-01
  done: false
  episode_len_mean: 91.03636363636363
  episode_media: {}
  episode_reward_max: 13.08000000000001
  episode_reward_mean: 3.62972727272728
  episode_reward_min: -1.3300000000000003
  episodes_this_iter: 110
  episodes_total: 37107
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2434215708675547
          entropy_coeff: 0.01
          kl: 0.014149978707341769
          policy_loss: -0.0734064681463453
          total_loss: 0.0957532468976246
          vf_explained_var: 0.9275050759315491
          vf_loss: 0.15935851017920635
    num_agent_steps_sampled: 3398640
    num_agent_steps_trained: 3398640
    num_steps_sampled: 3398640
    num_steps_trained: 339864

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,340,52639,3398640,3.62973,13.08,-1.33,91.0364




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3408636
  custom_metrics: {}
  date: 2021-11-08_04-54-18
  done: false
  episode_len_mean: 89.27678571428571
  episode_media: {}
  episode_reward_max: 14.890000000000013
  episode_reward_mean: 3.6683035714285794
  episode_reward_min: -1.6500000000000006
  episodes_this_iter: 112
  episodes_total: 37219
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2352333527344923
          entropy_coeff: 0.01
          kl: 0.013798220867827348
          policy_loss: -0.07260048294949353
          total_loss: 0.11722193813890729
          vf_explained_var: 0.9051232933998108
          vf_loss: 0.18074068075412103
    num_agent_steps_sampled: 3408636
    num_agent_steps_trained: 3408636
    num_steps_sampled: 3408636
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,341,52836,3408636,3.6683,14.89,-1.65,89.2768




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3418632
  custom_metrics: {}
  date: 2021-11-08_04-57-24
  done: false
  episode_len_mean: 87.97368421052632
  episode_media: {}
  episode_reward_max: 12.770000000000016
  episode_reward_mean: 3.4905263157894817
  episode_reward_min: -1.6400000000000008
  episodes_this_iter: 114
  episodes_total: 37333
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.233782192784497
          entropy_coeff: 0.01
          kl: 0.014002884676449133
          policy_loss: -0.06933212568028232
          total_loss: 0.09364019586808152
          vf_explained_var: 0.9228399991989136
          vf_loss: 0.15340982081575527
    num_agent_steps_sampled: 3418632
    num_agent_steps_trained: 3418632
    num_steps_sampled: 3418632
    num_steps_trained: 34

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,342,53022.7,3418632,3.49053,12.77,-1.64,87.9737




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3428628
  custom_metrics: {}
  date: 2021-11-08_04-59-58
  done: false
  episode_len_mean: 89.9375
  episode_media: {}
  episode_reward_max: 14.990000000000013
  episode_reward_mean: 3.2680357142857224
  episode_reward_min: -1.6700000000000008
  episodes_this_iter: 112
  episodes_total: 37445
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2529360919936092
          entropy_coeff: 0.01
          kl: 0.013658910739881354
          policy_loss: -0.07228654913731619
          total_loss: 0.09598119093630558
          vf_explained_var: 0.9176378846168518
          vf_loss: 0.15968039474553533
    num_agent_steps_sampled: 3428628
    num_agent_steps_trained: 3428628
    num_steps_sampled: 3428628
    num_steps_trained: 3428628
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,343,53176.6,3428628,3.26804,14.99,-1.67,89.9375




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3438624
  custom_metrics: {}
  date: 2021-11-08_05-02-31
  done: false
  episode_len_mean: 91.19266055045871
  episode_media: {}
  episode_reward_max: 12.640000000000015
  episode_reward_mean: 3.922201834862394
  episode_reward_min: -2.0600000000000005
  episodes_this_iter: 109
  episodes_total: 37554
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2242654339880006
          entropy_coeff: 0.01
          kl: 0.014217291696684306
          policy_loss: -0.07131992530427937
          total_loss: 0.09648177240234919
          vf_explained_var: 0.9283559918403625
          vf_loss: 0.1576555834685126
    num_agent_steps_sampled: 3438624
    num_agent_steps_trained: 3438624
    num_steps_sampled: 3438624
    num_steps_trained: 343

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,344,53328.9,3438624,3.9222,12.64,-2.06,91.1927




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3448620
  custom_metrics: {}
  date: 2021-11-08_05-05-03
  done: false
  episode_len_mean: 92.83177570093459
  episode_media: {}
  episode_reward_max: 13.210000000000012
  episode_reward_mean: 3.7204672897196343
  episode_reward_min: -1.5200000000000005
  episodes_this_iter: 107
  episodes_total: 37661
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.245342842534057
          entropy_coeff: 0.01
          kl: 0.013742611822342725
          policy_loss: -0.07400520229154928
          total_loss: 0.0952505141027017
          vf_explained_var: 0.9297173619270325
          vf_loss: 0.16040175639283963
    num_agent_steps_sampled: 3448620
    num_agent_steps_trained: 3448620
    num_steps_sampled: 3448620
    num_steps_trained: 344

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,345,53481.1,3448620,3.72047,13.21,-1.52,92.8318




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3458616
  custom_metrics: {}
  date: 2021-11-08_05-07-53
  done: false
  episode_len_mean: 92.79629629629629
  episode_media: {}
  episode_reward_max: 14.92000000000001
  episode_reward_mean: 3.611018518518527
  episode_reward_min: -1.0100000000000005
  episodes_this_iter: 108
  episodes_total: 37769
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2484480501240136
          entropy_coeff: 0.01
          kl: 0.013678052848185516
          policy_loss: -0.07230657456904395
          total_loss: 0.08383475301516616
          vf_explained_var: 0.9226611256599426
          vf_loss: 0.14746549187116642
    num_agent_steps_sampled: 3458616
    num_agent_steps_trained: 3458616
    num_steps_sampled: 3458616
    num_steps_trained: 345

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,346,53650.7,3458616,3.61102,14.92,-1.01,92.7963




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3468612
  custom_metrics: {}
  date: 2021-11-08_05-10-25
  done: false
  episode_len_mean: 92.36111111111111
  episode_media: {}
  episode_reward_max: 12.730000000000018
  episode_reward_mean: 3.34546296296297
  episode_reward_min: -1.6600000000000008
  episodes_this_iter: 108
  episodes_total: 37877
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.241486157209445
          entropy_coeff: 0.01
          kl: 0.013418731519082296
          policy_loss: -0.07145958872209503
          total_loss: 0.07375239224817891
          vf_explained_var: 0.9285520315170288
          vf_loss: 0.13705729361400645
    num_agent_steps_sampled: 3468612
    num_agent_steps_trained: 3468612
    num_steps_sampled: 3468612
    num_steps_trained: 3468

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,347,53802.6,3468612,3.34546,12.73,-1.66,92.3611




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3478608
  custom_metrics: {}
  date: 2021-11-08_05-13-12
  done: false
  episode_len_mean: 91.89908256880734
  episode_media: {}
  episode_reward_max: 12.650000000000016
  episode_reward_mean: 3.556697247706431
  episode_reward_min: -1.0800000000000005
  episodes_this_iter: 109
  episodes_total: 37986
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.242501612402435
          entropy_coeff: 0.01
          kl: 0.013464355084053482
          policy_loss: -0.06985651512558644
          total_loss: 0.07007750964476767
          vf_explained_var: 0.9289230704307556
          vf_loss: 0.13168555674358057
    num_agent_steps_sampled: 3478608
    num_agent_steps_trained: 3478608
    num_steps_sampled: 3478608
    num_steps_trained: 347

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,348,53969.9,3478608,3.5567,12.65,-1.08,91.8991




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3488604
  custom_metrics: {}
  date: 2021-11-08_05-16-01
  done: false
  episode_len_mean: 91.55045871559633
  episode_media: {}
  episode_reward_max: 11.230000000000013
  episode_reward_mean: 3.859266055045881
  episode_reward_min: -1.1300000000000006
  episodes_this_iter: 109
  episodes_total: 38095
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2468109513959313
          entropy_coeff: 0.01
          kl: 0.014308046681599889
          policy_loss: -0.07078834888803907
          total_loss: 0.11179647179017974
          vf_explained_var: 0.9267479181289673
          vf_loss: 0.1724574104206175
    num_agent_steps_sampled: 3488604
    num_agent_steps_trained: 3488604
    num_steps_sampled: 3488604
    num_steps_trained: 348

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,349,54138.8,3488604,3.85927,11.23,-1.13,91.5505




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3498600
  custom_metrics: {}
  date: 2021-11-08_05-18-34
  done: false
  episode_len_mean: 92.68518518518519
  episode_media: {}
  episode_reward_max: 13.070000000000014
  episode_reward_mean: 3.622407407407415
  episode_reward_min: -2.0599999999999996
  episodes_this_iter: 108
  episodes_total: 38203
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.225293137273218
          entropy_coeff: 0.01
          kl: 0.01455541058540669
          policy_loss: -0.06935402600842919
          total_loss: 0.09201014508357924
          vf_explained_var: 0.9307939410209656
          vf_loss: 0.1504580564987965
    num_agent_steps_sampled: 3498600
    num_agent_steps_trained: 3498600
    num_steps_sampled: 3498600
    num_steps_trained: 34986

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,350,54292,3498600,3.62241,13.07,-2.06,92.6852




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3508596
  custom_metrics: {}
  date: 2021-11-08_05-21-04
  done: false
  episode_len_mean: 92.58715596330275
  episode_media: {}
  episode_reward_max: 12.920000000000014
  episode_reward_mean: 3.4973394495412924
  episode_reward_min: -1.5800000000000005
  episodes_this_iter: 109
  episodes_total: 38312
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2357284567294977
          entropy_coeff: 0.01
          kl: 0.014656060381935055
          policy_loss: -0.06932840124091022
          total_loss: 0.11255107653987968
          vf_explained_var: 0.9173742532730103
          vf_loss: 0.17084842371459827
    num_agent_steps_sampled: 3508596
    num_agent_steps_trained: 3508596
    num_steps_sampled: 3508596
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,351,54441.6,3508596,3.49734,12.92,-1.58,92.5872




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3518592
  custom_metrics: {}
  date: 2021-11-08_05-23-56
  done: false
  episode_len_mean: 91.56481481481481
  episode_media: {}
  episode_reward_max: 12.010000000000016
  episode_reward_mean: 3.719907407407416
  episode_reward_min: -1.3300000000000003
  episodes_this_iter: 108
  episodes_total: 38420
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.238563604028816
          entropy_coeff: 0.01
          kl: 0.014708764170796819
          policy_loss: -0.0670214005252426
          total_loss: 0.1172587055951739
          vf_explained_var: 0.9212980270385742
          vf_loss: 0.17315733840800504
    num_agent_steps_sampled: 3518592
    num_agent_steps_trained: 3518592
    num_steps_sampled: 3518592
    num_steps_trained: 35185

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,352,54613.5,3518592,3.71991,12.01,-1.33,91.5648




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3528588
  custom_metrics: {}
  date: 2021-11-08_05-26-24
  done: false
  episode_len_mean: 94.48113207547169
  episode_media: {}
  episode_reward_max: 13.14000000000001
  episode_reward_mean: 3.5118867924528394
  episode_reward_min: -1.3000000000000003
  episodes_this_iter: 106
  episodes_total: 38526
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2523372081609874
          entropy_coeff: 0.01
          kl: 0.013249710335648283
          policy_loss: -0.07291444290079113
          total_loss: 0.06302080356285103
          vf_explained_var: 0.937316358089447
          vf_loss: 0.12827412088871257
    num_agent_steps_sampled: 3528588
    num_agent_steps_trained: 3528588
    num_steps_sampled: 3528588
    num_steps_trained: 352

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,353,54761.8,3528588,3.51189,13.14,-1.3,94.4811




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3538584
  custom_metrics: {}
  date: 2021-11-08_05-29-15
  done: false
  episode_len_mean: 94.01886792452831
  episode_media: {}
  episode_reward_max: 11.180000000000014
  episode_reward_mean: 3.9870754716981223
  episode_reward_min: -1.6400000000000008
  episodes_this_iter: 106
  episodes_total: 38632
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2398057254970585
          entropy_coeff: 0.01
          kl: 0.014767240124584526
          policy_loss: -0.06819615016221746
          total_loss: 0.09200278982106182
          vf_explained_var: 0.9232621192932129
          vf_loss: 0.14895537621699848
    num_agent_steps_sampled: 3538584
    num_agent_steps_trained: 3538584
    num_steps_sampled: 3538584
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,354,54932.7,3538584,3.98708,11.18,-1.64,94.0189




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3548580
  custom_metrics: {}
  date: 2021-11-08_05-31-47
  done: false
  episode_len_mean: 93.8411214953271
  episode_media: {}
  episode_reward_max: 13.140000000000011
  episode_reward_mean: 3.600841121495336
  episode_reward_min: -1.6200000000000008
  episodes_this_iter: 107
  episodes_total: 38739
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2300586346887115
          entropy_coeff: 0.01
          kl: 0.014551115972451241
          policy_loss: -0.06903870027337192
          total_loss: 0.10638914453502521
          vf_explained_var: 0.9359985589981079
          vf_loss: 0.16457916888543683
    num_agent_steps_sampled: 3548580
    num_agent_steps_trained: 3548580
    num_steps_sampled: 3548580
    num_steps_trained: 354

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,355,55084.7,3548580,3.60084,13.14,-1.62,93.8411




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3558576
  custom_metrics: {}
  date: 2021-11-08_05-34-30
  done: false
  episode_len_mean: 92.73148148148148
  episode_media: {}
  episode_reward_max: 12.970000000000015
  episode_reward_mean: 4.209907407407417
  episode_reward_min: -1.4500000000000006
  episodes_this_iter: 108
  episodes_total: 38847
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2337730994591345
          entropy_coeff: 0.01
          kl: 0.013822436178079416
          policy_loss: -0.0730097367372523
          total_loss: 0.09360877120883292
          vf_explained_var: 0.9356511235237122
          vf_loss: 0.15746700097448552
    num_agent_steps_sampled: 3558576
    num_agent_steps_trained: 3558576
    num_steps_sampled: 3558576
    num_steps_trained: 355

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,356,55248.1,3558576,4.20991,12.97,-1.45,92.7315




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3568572
  custom_metrics: {}
  date: 2021-11-08_05-37-11
  done: false
  episode_len_mean: 94.9245283018868
  episode_media: {}
  episode_reward_max: 10.510000000000012
  episode_reward_mean: 3.4497169811320836
  episode_reward_min: -1.6800000000000008
  episodes_this_iter: 106
  episodes_total: 38953
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2383927078328583
          entropy_coeff: 0.01
          kl: 0.013921541881304532
          policy_loss: -0.06946550017366042
          total_loss: 0.08440158755446855
          vf_explained_var: 0.9315767884254456
          vf_loss: 0.14453600166827185
    num_agent_steps_sampled: 3568572
    num_agent_steps_trained: 3568572
    num_steps_sampled: 3568572
    num_steps_trained: 35

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,357,55408.4,3568572,3.44972,10.51,-1.68,94.9245




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3578568
  custom_metrics: {}
  date: 2021-11-08_05-40-28
  done: false
  episode_len_mean: 93.29245283018868
  episode_media: {}
  episode_reward_max: 12.980000000000011
  episode_reward_mean: 3.948301886792461
  episode_reward_min: -1.5000000000000007
  episodes_this_iter: 106
  episodes_total: 39059
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2278693086061723
          entropy_coeff: 0.01
          kl: 0.013268942996712645
          policy_loss: -0.0731553165632117
          total_loss: 0.07966884378598542
          vf_explained_var: 0.9345712661743164
          vf_loss: 0.1448745423490102
    num_agent_steps_sampled: 3578568
    num_agent_steps_trained: 3578568
    num_steps_sampled: 3578568
    num_steps_trained: 3578

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,358,55605.7,3578568,3.9483,12.98,-1.5,93.2925




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3588564
  custom_metrics: {}
  date: 2021-11-08_05-43-13
  done: false
  episode_len_mean: 94.26415094339623
  episode_media: {}
  episode_reward_max: 10.830000000000016
  episode_reward_mean: 3.6489622641509523
  episode_reward_min: -1.6500000000000006
  episodes_this_iter: 106
  episodes_total: 39165
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2273643157421015
          entropy_coeff: 0.01
          kl: 0.013430212778234953
          policy_loss: -0.07079231958103994
          total_loss: 0.07046961164595479
          vf_explained_var: 0.9294625520706177
          vf_loss: 0.1329398697337661
    num_agent_steps_sampled: 3588564
    num_agent_steps_trained: 3588564
    num_steps_sampled: 3588564
    num_steps_trained: 35

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,359,55770.4,3588564,3.64896,10.83,-1.65,94.2642


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3598560
  custom_metrics: {}
  date: 2021-11-08_05-45-28
  done: false
  episode_len_mean: 95.95238095238095
  episode_media: {}
  episode_reward_max: 10.950000000000012
  episode_reward_mean: 3.136476190476198
  episode_reward_min: -1.6900000000000008
  episodes_this_iter: 105
  episodes_total: 39270
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.236630449947129
          entropy_coeff: 0.01
          kl: 0.014018558101995154
          policy_loss: -0.07398003312782982
          total_loss: 0.07371509950130414
          vf_explained_var: 0.9301438331604004
          vf_loss: 0.13812540844719634
    num_agent_steps_sampled: 3598560
    num_agent_steps_trained: 3598560
    num_steps_sampled: 3598560
    num_steps_trained: 359

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,360,55905.8,3598560,3.13648,10.95,-1.69,95.9524




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3608556
  custom_metrics: {}
  date: 2021-11-08_05-47-59
  done: false
  episode_len_mean: 95.83653846153847
  episode_media: {}
  episode_reward_max: 12.960000000000013
  episode_reward_mean: 4.12125000000001
  episode_reward_min: -1.4600000000000006
  episodes_this_iter: 104
  episodes_total: 39374
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2119285273755716
          entropy_coeff: 0.01
          kl: 0.013933339799318052
          policy_loss: -0.07228747834889297
          total_loss: 0.09369607929808971
          vf_explained_var: 0.9349416494369507
          vf_loss: 0.15636095257682933
    num_agent_steps_sampled: 3608556
    num_agent_steps_trained: 3608556
    num_steps_sampled: 3608556
    num_steps_trained: 360

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,361,56056.2,3608556,4.12125,12.96,-1.46,95.8365




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3618552
  custom_metrics: {}
  date: 2021-11-08_05-50-44
  done: false
  episode_len_mean: 91.75
  episode_media: {}
  episode_reward_max: 11.150000000000011
  episode_reward_mean: 4.094074074074083
  episode_reward_min: -1.750000000000001
  episodes_this_iter: 108
  episodes_total: 39482
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.19566532750415
          entropy_coeff: 0.01
          kl: 0.013330398772573979
          policy_loss: -0.07201183079463294
          total_loss: 0.10627883282593555
          vf_explained_var: 0.9211147427558899
          vf_loss: 0.16987900198238273
    num_agent_steps_sampled: 3618552
    num_agent_steps_trained: 3618552
    num_steps_sampled: 3618552
    num_steps_trained: 3618552
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,362,56221.7,3618552,4.09407,11.15,-1.75,91.75




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3628548
  custom_metrics: {}
  date: 2021-11-08_05-53-17
  done: false
  episode_len_mean: 96.52884615384616
  episode_media: {}
  episode_reward_max: 14.840000000000012
  episode_reward_mean: 3.714903846153855
  episode_reward_min: -1.9000000000000006
  episodes_this_iter: 104
  episodes_total: 39586
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.235807717152131
          entropy_coeff: 0.01
          kl: 0.014040543590248604
          policy_loss: -0.06750000798676768
          total_loss: 0.10675893277089056
          vf_explained_var: 0.929010272026062
          vf_loss: 0.16463090382898465
    num_agent_steps_sampled: 3628548
    num_agent_steps_trained: 3628548
    num_steps_sampled: 3628548
    num_steps_trained: 3628

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,363,56374.1,3628548,3.7149,14.84,-1.9,96.5288




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3638544
  custom_metrics: {}
  date: 2021-11-08_05-56-15
  done: false
  episode_len_mean: 93.1214953271028
  episode_media: {}
  episode_reward_max: 10.940000000000017
  episode_reward_mean: 3.4078504672897267
  episode_reward_min: -1.3200000000000005
  episodes_this_iter: 107
  episodes_total: 39693
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2336977163950604
          entropy_coeff: 0.01
          kl: 0.014147902421439899
          policy_loss: -0.0691674546760499
          total_loss: 0.09847209720561902
          vf_explained_var: 0.920369029045105
          vf_loss: 0.1577458389899415
    num_agent_steps_sampled: 3638544
    num_agent_steps_trained: 3638544
    num_steps_sampled: 3638544
    num_steps_trained: 36385

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,364,56552.8,3638544,3.40785,10.94,-1.32,93.1215




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3648540
  custom_metrics: {}
  date: 2021-11-08_05-59-07
  done: false
  episode_len_mean: 94.25233644859813
  episode_media: {}
  episode_reward_max: 12.570000000000014
  episode_reward_mean: 3.5583177570093545
  episode_reward_min: -1.6500000000000008
  episodes_this_iter: 107
  episodes_total: 39800
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2107077786046214
          entropy_coeff: 0.01
          kl: 0.012700260202466247
          policy_loss: -0.07040331567295341
          total_loss: 0.0737897862552896
          vf_explained_var: 0.9267804622650146
          vf_loss: 0.13736739804586157
    num_agent_steps_sampled: 3648540
    num_agent_steps_trained: 3648540
    num_steps_sampled: 3648540
    num_steps_trained: 36

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,365,56724.4,3648540,3.55832,12.57,-1.65,94.2523




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3658536
  custom_metrics: {}
  date: 2021-11-08_06-02-07
  done: false
  episode_len_mean: 92.67592592592592
  episode_media: {}
  episode_reward_max: 10.540000000000015
  episode_reward_mean: 3.495740740740749
  episode_reward_min: -2.079999999999999
  episodes_this_iter: 108
  episodes_total: 39908
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.220704848745949
          entropy_coeff: 0.01
          kl: 0.014166747174139294
          policy_loss: -0.06679148473737077
          total_loss: 0.08825509241646809
          vf_explained_var: 0.9356070756912231
          vf_loss: 0.14498000379460743
    num_agent_steps_sampled: 3658536
    num_agent_steps_trained: 3658536
    num_steps_sampled: 3658536
    num_steps_trained: 3658

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,366,56904.4,3658536,3.49574,10.54,-2.08,92.6759




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3668532
  custom_metrics: {}
  date: 2021-11-08_06-04-35
  done: false
  episode_len_mean: 94.78846153846153
  episode_media: {}
  episode_reward_max: 12.730000000000013
  episode_reward_mean: 3.471442307692317
  episode_reward_min: -1.3400000000000005
  episodes_this_iter: 104
  episodes_total: 40012
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.234641471479693
          entropy_coeff: 0.01
          kl: 0.01413010730973793
          policy_loss: -0.06767548601755984
          total_loss: 0.09966858668953307
          vf_explained_var: 0.9319909811019897
          vf_loss: 0.15750033546270978
    num_agent_steps_sampled: 3668532
    num_agent_steps_trained: 3668532
    num_steps_sampled: 3668532
    num_steps_trained: 3668

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,367,57052.4,3668532,3.47144,12.73,-1.34,94.7885


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3678528
  custom_metrics: {}
  date: 2021-11-08_06-06-46
  done: false
  episode_len_mean: 98.80392156862744
  episode_media: {}
  episode_reward_max: 16.619999999999948
  episode_reward_mean: 3.884313725490206
  episode_reward_min: -1.6300000000000008
  episodes_this_iter: 102
  episodes_total: 40114
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.226301493705847
          entropy_coeff: 0.01
          kl: 0.015022534574558176
          policy_loss: -0.06454387955272045
          total_loss: 0.1415173411090723
          vf_explained_var: 0.9209212064743042
          vf_loss: 0.19410102202310267
    num_agent_steps_sampled: 3678528
    num_agent_steps_trained: 3678528
    num_steps_sampled: 3678528
    num_steps_trained: 3678

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,368,57183.5,3678528,3.88431,16.62,-1.63,98.8039




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3688524
  custom_metrics: {}
  date: 2021-11-08_06-09-29
  done: false
  episode_len_mean: 93.75700934579439
  episode_media: {}
  episode_reward_max: 12.580000000000018
  episode_reward_mean: 3.606542056074776
  episode_reward_min: -1.5000000000000007
  episodes_this_iter: 107
  episodes_total: 40221
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2362976432865502
          entropy_coeff: 0.01
          kl: 0.012622544179343003
          policy_loss: -0.06828990484245567
          total_loss: 0.07848822119781095
          vf_explained_var: 0.9293187856674194
          vf_loss: 0.14038536829284878
    num_agent_steps_sampled: 3688524
    num_agent_steps_trained: 3688524
    num_steps_sampled: 3688524
    num_steps_trained: 36

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,369,57346.4,3688524,3.60654,12.58,-1.5,93.757


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3698520
  custom_metrics: {}
  date: 2021-11-08_06-11-39
  done: false
  episode_len_mean: 98.56862745098039
  episode_media: {}
  episode_reward_max: 12.370000000000019
  episode_reward_mean: 4.05000000000001
  episode_reward_min: -1.5000000000000009
  episodes_this_iter: 102
  episodes_total: 40323
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.238207125256204
          entropy_coeff: 0.01
          kl: 0.012677900531704018
          policy_loss: -0.07205587157135845
          total_loss: 0.07745514533872533
          vf_explained_var: 0.9374738931655884
          vf_loss: 0.143011244544043
    num_agent_steps_sampled: 3698520
    num_agent_steps_trained: 3698520
    num_steps_sampled: 3698520
    num_steps_trained: 369852

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,370,57476.5,3698520,4.05,12.37,-1.5,98.5686




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3708516
  custom_metrics: {}
  date: 2021-11-08_06-14-05
  done: false
  episode_len_mean: 97.38613861386139
  episode_media: {}
  episode_reward_max: 10.820000000000016
  episode_reward_mean: 3.8142574257425834
  episode_reward_min: -1.9000000000000012
  episodes_this_iter: 101
  episodes_total: 40424
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2394329064931626
          entropy_coeff: 0.01
          kl: 0.013888254862634106
          policy_loss: -0.0664838989193623
          total_loss: 0.12576477407501677
          vf_explained_var: 0.919617235660553
          vf_loss: 0.1830038193629211
    num_agent_steps_sampled: 3708516
    num_agent_steps_trained: 3708516
    num_steps_sampled: 3708516
    num_steps_trained: 3708

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,371,57621.8,3708516,3.81426,10.82,-1.9,97.3861


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3718512
  custom_metrics: {}
  date: 2021-11-08_06-16-17
  done: false
  episode_len_mean: 97.9126213592233
  episode_media: {}
  episode_reward_max: 12.450000000000015
  episode_reward_mean: 3.824271844660204
  episode_reward_min: -1.6500000000000008
  episodes_this_iter: 103
  episodes_total: 40527
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.247414851392436
          entropy_coeff: 0.01
          kl: 0.013623647688335886
          policy_loss: -0.06997730423752059
          total_loss: 0.08947391278850726
          vf_explained_var: 0.9304954409599304
          vf_loss: 0.150888991354304
    num_agent_steps_sampled: 3718512
    num_agent_steps_trained: 3718512
    num_steps_sampled: 3718512
    num_steps_trained: 371851

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,372,57753.9,3718512,3.82427,12.45,-1.65,97.9126




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3728508
  custom_metrics: {}
  date: 2021-11-08_06-18-41
  done: false
  episode_len_mean: 96.81553398058253
  episode_media: {}
  episode_reward_max: 10.470000000000017
  episode_reward_mean: 3.763203883495155
  episode_reward_min: -1.4100000000000006
  episodes_this_iter: 103
  episodes_total: 40630
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2489467230617493
          entropy_coeff: 0.01
          kl: 0.013078006388281948
          policy_loss: -0.0702554737830646
          total_loss: 0.09771888824418569
          vf_explained_var: 0.893979012966156
          vf_loss: 0.16067049610945913
    num_agent_steps_sampled: 3728508
    num_agent_steps_trained: 3728508
    num_steps_sampled: 3728508
    num_steps_trained: 3728

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,373,57898,3728508,3.7632,10.47,-1.41,96.8155




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3738504
  custom_metrics: {}
  date: 2021-11-08_06-21-19
  done: false
  episode_len_mean: 94.05660377358491
  episode_media: {}
  episode_reward_max: 11.000000000000014
  episode_reward_mean: 3.7894339622641606
  episode_reward_min: -0.9400000000000005
  episodes_this_iter: 106
  episodes_total: 40736
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2324062679567906
          entropy_coeff: 0.01
          kl: 0.012721285991649428
          policy_loss: -0.07082268186072763
          total_loss: 0.08105720846165512
          vf_explained_var: 0.934346616268158
          vf_loss: 0.14522327163025864
    num_agent_steps_sampled: 3738504
    num_agent_steps_trained: 3738504
    num_steps_sampled: 3738504
    num_steps_trained: 37

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,374,58055.8,3738504,3.78943,11,-0.94,94.0566




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3748500
  custom_metrics: {}
  date: 2021-11-08_06-23-46
  done: false
  episode_len_mean: 96.625
  episode_media: {}
  episode_reward_max: 11.16000000000001
  episode_reward_mean: 3.7062500000000105
  episode_reward_min: -1.4800000000000004
  episodes_this_iter: 104
  episodes_total: 40840
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2564092515880225
          entropy_coeff: 0.01
          kl: 0.012565801188769788
          policy_loss: -0.06968104060197998
          total_loss: 0.07381220032166466
          vf_explained_var: 0.9363589882850647
          vf_loss: 0.13743086681725122
    num_agent_steps_sampled: 3748500
    num_agent_steps_trained: 3748500
    num_steps_sampled: 3748500
    num_steps_trained: 3748500
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,375,58203.3,3748500,3.70625,11.16,-1.48,96.625


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3758496
  custom_metrics: {}
  date: 2021-11-08_06-26-03
  done: false
  episode_len_mean: 96.17307692307692
  episode_media: {}
  episode_reward_max: 10.590000000000016
  episode_reward_mean: 3.7372115384615485
  episode_reward_min: -1.850000000000001
  episodes_this_iter: 104
  episodes_total: 40944
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2583661501224226
          entropy_coeff: 0.01
          kl: 0.013789447921124715
          policy_loss: -0.06912503890438466
          total_loss: 0.08712639709950512
          vf_explained_var: 0.930955171585083
          vf_loss: 0.1474210114703856
    num_agent_steps_sampled: 3758496
    num_agent_steps_trained: 3758496
    num_steps_sampled: 3758496
    num_steps_trained: 3758

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,376,58340.2,3758496,3.73721,10.59,-1.85,96.1731


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3768492
  custom_metrics: {}
  date: 2021-11-08_06-28-21
  done: false
  episode_len_mean: 95.54285714285714
  episode_media: {}
  episode_reward_max: 12.920000000000016
  episode_reward_mean: 4.296952380952391
  episode_reward_min: -1.6900000000000008
  episodes_this_iter: 105
  episodes_total: 41049
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2538351578590197
          entropy_coeff: 0.01
          kl: 0.01513491722292738
          policy_loss: -0.06709078225092246
          total_loss: 0.11566301948653582
          vf_explained_var: 0.9343904256820679
          vf_loss: 0.1708129190147305
    num_agent_steps_sampled: 3768492
    num_agent_steps_trained: 3768492
    num_steps_sampled: 3768492
    num_steps_trained: 3768

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,377,58477.8,3768492,4.29695,12.92,-1.69,95.5429




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3778488
  custom_metrics: {}
  date: 2021-11-08_06-31-26
  done: false
  episode_len_mean: 92.48598130841121
  episode_media: {}
  episode_reward_max: 18.97
  episode_reward_mean: 4.012803738317767
  episode_reward_min: -2.559999999999998
  episodes_this_iter: 107
  episodes_total: 41156
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2337244692011775
          entropy_coeff: 0.01
          kl: 0.013768713760788283
          policy_loss: -0.06714112068502567
          total_loss: 0.08787266410982762
          vf_explained_var: 0.9472403526306152
          vf_loss: 0.14598417822112386
    num_agent_steps_sampled: 3778488
    num_agent_steps_trained: 3778488
    num_steps_sampled: 3778488
    num_steps_trained: 3778488
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,378,58663.1,3778488,4.0128,18.97,-2.56,92.486




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3788484
  custom_metrics: {}
  date: 2021-11-08_06-33-55
  done: false
  episode_len_mean: 97.01941747572816
  episode_media: {}
  episode_reward_max: 10.830000000000016
  episode_reward_mean: 3.3467961165048634
  episode_reward_min: -1.3400000000000007
  episodes_this_iter: 103
  episodes_total: 41259
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.256286431173993
          entropy_coeff: 0.01
          kl: 0.012408910666304415
          policy_loss: -0.07150985047889825
          total_loss: 0.04905219681115232
          vf_explained_var: 0.9375060796737671
          vf_loss: 0.11485586146959383
    num_agent_steps_sampled: 3788484
    num_agent_steps_trained: 3788484
    num_steps_sampled: 3788484
    num_steps_trained: 37

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,379,58812,3788484,3.3468,10.83,-1.34,97.0194




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3798480
  custom_metrics: {}
  date: 2021-11-08_06-36-39
  done: false
  episode_len_mean: 95.47169811320755
  episode_media: {}
  episode_reward_max: 12.810000000000015
  episode_reward_mean: 4.195849056603784
  episode_reward_min: -1.4000000000000006
  episodes_this_iter: 106
  episodes_total: 41365
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.250670980795836
          entropy_coeff: 0.01
          kl: 0.013094687487013269
          policy_loss: -0.06866807002956286
          total_loss: 0.08699897091167096
          vf_explained_var: 0.9219944477081299
          vf_loss: 0.14834241415891383
    num_agent_steps_sampled: 3798480
    num_agent_steps_trained: 3798480
    num_steps_sampled: 3798480
    num_steps_trained: 379

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,380,58976,3798480,4.19585,12.81,-1.4,95.4717




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3808476
  custom_metrics: {}
  date: 2021-11-08_06-39-22
  done: false
  episode_len_mean: 95.82692307692308
  episode_media: {}
  episode_reward_max: 12.640000000000017
  episode_reward_mean: 3.6253846153846245
  episode_reward_min: -1.9900000000000009
  episodes_this_iter: 104
  episodes_total: 41469
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.227399496543102
          entropy_coeff: 0.01
          kl: 0.012922092405845879
          policy_loss: -0.07036707929343494
          total_loss: 0.07047558340490756
          vf_explained_var: 0.932105541229248
          vf_loss: 0.13367851593523708
    num_agent_steps_sampled: 3808476
    num_agent_steps_trained: 3808476
    num_steps_sampled: 3808476
    num_steps_trained: 380

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,381,59139,3808476,3.62538,12.64,-1.99,95.8269




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3818472
  custom_metrics: {}
  date: 2021-11-08_06-41-47
  done: false
  episode_len_mean: 94.8952380952381
  episode_media: {}
  episode_reward_max: 14.800000000000011
  episode_reward_mean: 4.115047619047628
  episode_reward_min: -1.820000000000001
  episodes_this_iter: 105
  episodes_total: 41574
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.211767719545935
          entropy_coeff: 0.01
          kl: 0.014122279062020753
          policy_loss: -0.06946911651832172
          total_loss: 0.09791905064549711
          vf_explained_var: 0.939019501209259
          vf_loss: 0.15733352485630248
    num_agent_steps_sampled: 3818472
    num_agent_steps_trained: 3818472
    num_steps_sampled: 3818472
    num_steps_trained: 381847

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,382,59284,3818472,4.11505,14.8,-1.82,94.8952




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3828468
  custom_metrics: {}
  date: 2021-11-08_06-44-51
  done: false
  episode_len_mean: 93.4766355140187
  episode_media: {}
  episode_reward_max: 12.490000000000013
  episode_reward_mean: 3.8120560747663648
  episode_reward_min: -1.880000000000001
  episodes_this_iter: 107
  episodes_total: 41681
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.219113216848455
          entropy_coeff: 0.01
          kl: 0.01424862148572979
          policy_loss: -0.064075380938653
          total_loss: 0.12426802726207763
          vf_explained_var: 0.9313440918922424
          vf_loss: 0.17807439875462625
    num_agent_steps_sampled: 3828468
    num_agent_steps_trained: 3828468
    num_steps_sampled: 3828468
    num_steps_trained: 3828468

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,383,59467.9,3828468,3.81206,12.49,-1.88,93.4766




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3838464
  custom_metrics: {}
  date: 2021-11-08_06-47-24
  done: false
  episode_len_mean: 97.22330097087378
  episode_media: {}
  episode_reward_max: 9.05000000000001
  episode_reward_mean: 3.665825242718458
  episode_reward_min: -1.6300000000000008
  episodes_this_iter: 103
  episodes_total: 41784
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.248081186082628
          entropy_coeff: 0.01
          kl: 0.012927719041634495
          policy_loss: -0.07048216066920222
          total_loss: 0.08881303899738396
          vf_explained_var: 0.9237533211708069
          vf_loss: 0.15232505227924667
    num_agent_steps_sampled: 3838464
    num_agent_steps_trained: 3838464
    num_steps_sampled: 3838464
    num_steps_trained: 38384

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,384,59620.3,3838464,3.66583,9.05,-1.63,97.2233




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3848460
  custom_metrics: {}
  date: 2021-11-08_06-50-18
  done: false
  episode_len_mean: 94.90384615384616
  episode_media: {}
  episode_reward_max: 10.920000000000009
  episode_reward_mean: 3.9915384615384712
  episode_reward_min: -1.4300000000000006
  episodes_this_iter: 104
  episodes_total: 41888
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2160945737463797
          entropy_coeff: 0.01
          kl: 0.012994820926729385
          policy_loss: -0.06954847783900989
          total_loss: 0.07918810572984636
          vf_explained_var: 0.9276885986328125
          vf_loss: 0.1412937019044199
    num_agent_steps_sampled: 3848460
    num_agent_steps_trained: 3848460
    num_steps_sampled: 3848460
    num_steps_trained: 38

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,385,59794.1,3848460,3.99154,10.92,-1.43,94.9038




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3858456
  custom_metrics: {}
  date: 2021-11-08_06-53-21
  done: false
  episode_len_mean: 96.25
  episode_media: {}
  episode_reward_max: 10.590000000000016
  episode_reward_mean: 3.481153846153856
  episode_reward_min: -1.500000000000001
  episodes_this_iter: 104
  episodes_total: 41992
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2657763790880514
          entropy_coeff: 0.01
          kl: 0.01235143307680307
          policy_loss: -0.07325817870023923
          total_loss: 0.05797068032189312
          vf_explained_var: 0.9346731901168823
          vf_loss: 0.12574851361668518
    num_agent_steps_sampled: 3858456
    num_agent_steps_trained: 3858456
    num_steps_sampled: 3858456
    num_steps_trained: 3858456
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,386,59977.2,3858456,3.48115,10.59,-1.5,96.25




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3868452
  custom_metrics: {}
  date: 2021-11-08_06-55-46
  done: false
  episode_len_mean: 96.99038461538461
  episode_media: {}
  episode_reward_max: 10.980000000000018
  episode_reward_mean: 3.7560576923077016
  episode_reward_min: -2.000000000000001
  episodes_this_iter: 104
  episodes_total: 42096
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.229100269639594
          entropy_coeff: 0.01
          kl: 0.014521874604028616
          policy_loss: -0.06723871528147123
          total_loss: 0.0929638184622949
          vf_explained_var: 0.9363389611244202
          vf_loss: 0.14941089062227142
    num_agent_steps_sampled: 3868452
    num_agent_steps_trained: 3868452
    num_steps_sampled: 3868452
    num_steps_trained: 3868

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,387,60122.3,3868452,3.75606,10.98,-2,96.9904




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3878448
  custom_metrics: {}
  date: 2021-11-08_06-58-16
  done: false
  episode_len_mean: 94.40566037735849
  episode_media: {}
  episode_reward_max: 10.820000000000014
  episode_reward_mean: 3.1568867924528377
  episode_reward_min: -2.0399999999999996
  episodes_this_iter: 106
  episodes_total: 42202
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2436540733035812
          entropy_coeff: 0.01
          kl: 0.012928781149853642
          policy_loss: -0.06627599454174439
          total_loss: 0.08543121557619072
          vf_explained_var: 0.9216033220291138
          vf_loss: 0.14469037048009217
    num_agent_steps_sampled: 3878448
    num_agent_steps_trained: 3878448
    num_steps_sampled: 3878448
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,388,60272.9,3878448,3.15689,10.82,-2.04,94.4057




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3888444
  custom_metrics: {}
  date: 2021-11-08_07-00-46
  done: false
  episode_len_mean: 94.13333333333334
  episode_media: {}
  episode_reward_max: 11.860000000000015
  episode_reward_mean: 3.6407619047619137
  episode_reward_min: -1.3700000000000006
  episodes_this_iter: 105
  episodes_total: 42307
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.212661032187633
          entropy_coeff: 0.01
          kl: 0.01379081725615208
          policy_loss: -0.06785711103843318
          total_loss: 0.0983018597910324
          vf_explained_var: 0.923886775970459
          vf_loss: 0.15686837566745843
    num_agent_steps_sampled: 3888444
    num_agent_steps_trained: 3888444
    num_steps_sampled: 3888444
    num_steps_trained: 38884

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,389,60422.1,3888444,3.64076,11.86,-1.37,94.1333




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3898440
  custom_metrics: {}
  date: 2021-11-08_07-03-49
  done: false
  episode_len_mean: 93.55140186915888
  episode_media: {}
  episode_reward_max: 14.480000000000015
  episode_reward_mean: 4.397009345794402
  episode_reward_min: -1.9500000000000013
  episodes_this_iter: 107
  episodes_total: 42414
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.196883244901641
          entropy_coeff: 0.01
          kl: 0.01430998918907244
          policy_loss: -0.06368057767613831
          total_loss: 0.11762873544954719
          vf_explained_var: 0.9359781742095947
          vf_loss: 0.17067819939544185
    num_agent_steps_sampled: 3898440
    num_agent_steps_trained: 3898440
    num_steps_sampled: 3898440
    num_steps_trained: 3898

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,390,60605.3,3898440,4.39701,14.48,-1.95,93.5514




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3908436
  custom_metrics: {}
  date: 2021-11-08_07-06-17
  done: false
  episode_len_mean: 95.01886792452831
  episode_media: {}
  episode_reward_max: 12.730000000000013
  episode_reward_mean: 3.6762264150943476
  episode_reward_min: -1.9699999999999993
  episodes_this_iter: 106
  episodes_total: 42520
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2476426330387085
          entropy_coeff: 0.01
          kl: 0.013949306849474182
          policy_loss: -0.07045767569484619
          total_loss: 0.11838753355953556
          vf_explained_var: 0.9221041202545166
          vf_loss: 0.17954337022816524
    num_agent_steps_sampled: 3908436
    num_agent_steps_trained: 3908436
    num_steps_sampled: 3908436
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,391,60753.1,3908436,3.67623,12.73,-1.97,95.0189




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3918432
  custom_metrics: {}
  date: 2021-11-08_07-08-47
  done: false
  episode_len_mean: 94.23584905660377
  episode_media: {}
  episode_reward_max: 10.820000000000011
  episode_reward_mean: 3.619339622641518
  episode_reward_min: -1.9000000000000008
  episodes_this_iter: 106
  episodes_total: 42626
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.23437088493608
          entropy_coeff: 0.01
          kl: 0.013125569054304705
          policy_loss: -0.06602280397827809
          total_loss: 0.11197909401236182
          vf_explained_var: 0.9152719378471375
          vf_loss: 0.17044391862602315
    num_agent_steps_sampled: 3918432
    num_agent_steps_trained: 3918432
    num_steps_sampled: 3918432
    num_steps_trained: 3918

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,392,60903.6,3918432,3.61934,10.82,-1.9,94.2358




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3928428
  custom_metrics: {}
  date: 2021-11-08_07-11-19
  done: false
  episode_len_mean: 93.93396226415095
  episode_media: {}
  episode_reward_max: 10.94000000000001
  episode_reward_mean: 3.925471698113217
  episode_reward_min: -1.2600000000000005
  episodes_this_iter: 106
  episodes_total: 42732
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2283788717710054
          entropy_coeff: 0.01
          kl: 0.01415129706827545
          policy_loss: -0.06742266878390159
          total_loss: 0.1145417887940366
          vf_explained_var: 0.9380373954772949
          vf_loss: 0.1720098214956303
    num_agent_steps_sampled: 3928428
    num_agent_steps_trained: 3928428
    num_steps_sampled: 3928428
    num_steps_trained: 392842

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,393,61055,3928428,3.92547,10.94,-1.26,93.934


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3938424
  custom_metrics: {}
  date: 2021-11-08_07-13-38
  done: false
  episode_len_mean: 93.38888888888889
  episode_media: {}
  episode_reward_max: 10.820000000000014
  episode_reward_mean: 3.101759259259267
  episode_reward_min: -1.6100000000000008
  episodes_this_iter: 108
  episodes_total: 42840
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2534369236383682
          entropy_coeff: 0.01
          kl: 0.011992995901379202
          policy_loss: -0.0708500837477354
          total_loss: 0.05730162461917115
          vf_explained_var: 0.9329063892364502
          vf_loss: 0.12336453301115678
    num_agent_steps_sampled: 3938424
    num_agent_steps_trained: 3938424
    num_steps_sampled: 3938424
    num_steps_trained: 393

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,394,61193.7,3938424,3.10176,10.82,-1.61,93.3889




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3948420
  custom_metrics: {}
  date: 2021-11-08_07-16-16
  done: false
  episode_len_mean: 90.06363636363636
  episode_media: {}
  episode_reward_max: 14.820000000000014
  episode_reward_mean: 3.4374545454545538
  episode_reward_min: -1.6000000000000008
  episodes_this_iter: 110
  episodes_total: 42950
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2157852096435353
          entropy_coeff: 0.01
          kl: 0.013116380780730673
          policy_loss: -0.06800684282699457
          total_loss: 0.0974797846064863
          vf_explained_var: 0.9329434633255005
          vf_loss: 0.15776372364539112
    num_agent_steps_sampled: 3948420
    num_agent_steps_trained: 3948420
    num_steps_sampled: 3948420
    num_steps_trained: 39

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,395,61352,3948420,3.43745,14.82,-1.6,90.0636




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3958416
  custom_metrics: {}
  date: 2021-11-08_07-19-01
  done: false
  episode_len_mean: 92.03636363636363
  episode_media: {}
  episode_reward_max: 12.64000000000001
  episode_reward_mean: 4.101636363636373
  episode_reward_min: -1.5100000000000007
  episodes_this_iter: 110
  episodes_total: 43060
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.208857988088559
          entropy_coeff: 0.01
          kl: 0.014001514407959633
          policy_loss: -0.06969778502566946
          total_loss: 0.1174898368298498
          vf_explained_var: 0.9274449348449707
          vf_loss: 0.17737900058890127
    num_agent_steps_sampled: 3958416
    num_agent_steps_trained: 3958416
    num_steps_sampled: 3958416
    num_steps_trained: 39584

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,396,61516.7,3958416,4.10164,12.64,-1.51,92.0364




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3968412
  custom_metrics: {}
  date: 2021-11-08_07-21-32
  done: false
  episode_len_mean: 93.27102803738318
  episode_media: {}
  episode_reward_max: 12.870000000000015
  episode_reward_mean: 3.508224299065429
  episode_reward_min: -1.5500000000000007
  episodes_this_iter: 107
  episodes_total: 43167
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.20878486918588
          entropy_coeff: 0.01
          kl: 0.013215014294412672
          policy_loss: -0.06458808361019334
          total_loss: 0.1020493204295476
          vf_explained_var: 0.9300735592842102
          vf_loss: 0.15861979875013105
    num_agent_steps_sampled: 3968412
    num_agent_steps_trained: 3968412
    num_steps_sampled: 3968412
    num_steps_trained: 39684

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,397,61667.9,3968412,3.50822,12.87,-1.55,93.271


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3978408
  custom_metrics: {}
  date: 2021-11-08_07-23-52
  done: false
  episode_len_mean: 93.0754716981132
  episode_media: {}
  episode_reward_max: 12.980000000000013
  episode_reward_mean: 3.7286792452830277
  episode_reward_min: -1.269999999999993
  episodes_this_iter: 106
  episodes_total: 43273
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1982595331648476
          entropy_coeff: 0.01
          kl: 0.012591814792039423
          policy_loss: -0.07132035033761436
          total_loss: 0.07406749107007288
          vf_explained_var: 0.9333450198173523
          vf_loss: 0.13868470801215652
    num_agent_steps_sampled: 3978408
    num_agent_steps_trained: 3978408
    num_steps_sampled: 3978408
    num_steps_trained: 397

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,398,61807.9,3978408,3.72868,12.98,-1.27,93.0755




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3988404
  custom_metrics: {}
  date: 2021-11-08_07-26-31
  done: false
  episode_len_mean: 90.85454545454546
  episode_media: {}
  episode_reward_max: 12.950000000000014
  episode_reward_mean: 4.20081818181819
  episode_reward_min: -1.5900000000000007
  episodes_this_iter: 110
  episodes_total: 43383
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2034991076868824
          entropy_coeff: 0.01
          kl: 0.014028993280339836
          policy_loss: -0.07103951104208191
          total_loss: 0.07948943059732262
          vf_explained_var: 0.9440323114395142
          vf_loss: 0.1406041318264145
    num_agent_steps_sampled: 3988404
    num_agent_steps_trained: 3988404
    num_steps_sampled: 3988404
    num_steps_trained: 3988

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,399,61967.1,3988404,4.20082,12.95,-1.59,90.8545




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 3998400
  custom_metrics: {}
  date: 2021-11-08_07-29-18
  done: false
  episode_len_mean: 90.99090909090908
  episode_media: {}
  episode_reward_max: 10.800000000000017
  episode_reward_mean: 3.3724545454545547
  episode_reward_min: -2.5899999999999985
  episodes_this_iter: 110
  episodes_total: 43493
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2218818134731717
          entropy_coeff: 0.01
          kl: 0.01363299596966418
          policy_loss: -0.0664840464758822
          total_loss: 0.10241083956013124
          vf_explained_var: 0.9242337942123413
          vf_loss: 0.16005603519196693
    num_agent_steps_sampled: 3998400
    num_agent_steps_trained: 3998400
    num_steps_sampled: 3998400
    num_steps_trained: 399

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,400,62134,3998400,3.37245,10.8,-2.59,90.9909




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4008396
  custom_metrics: {}
  date: 2021-11-08_07-33-01
  done: false
  episode_len_mean: 88.78761061946902
  episode_media: {}
  episode_reward_max: 11.30000000000001
  episode_reward_mean: 3.7380530973451407
  episode_reward_min: -1.6000000000000005
  episodes_this_iter: 113
  episodes_total: 43606
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1876680590148663
          entropy_coeff: 0.01
          kl: 0.012291638847195528
          policy_loss: -0.06935772543024812
          total_loss: 0.06601713622928175
          vf_explained_var: 0.9478688836097717
          vf_loss: 0.1292496522808941
    num_agent_steps_sampled: 4008396
    num_agent_steps_trained: 4008396
    num_steps_sampled: 4008396
    num_steps_trained: 400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,401,62356.5,4008396,3.73805,11.3,-1.6,88.7876




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4018392
  custom_metrics: {}
  date: 2021-11-08_07-35-49
  done: false
  episode_len_mean: 91.86238532110092
  episode_media: {}
  episode_reward_max: 14.770000000000016
  episode_reward_mean: 3.250825688073402
  episode_reward_min: -1.6400000000000008
  episodes_this_iter: 109
  episodes_total: 43715
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.221913212384933
          entropy_coeff: 0.01
          kl: 0.012750560753761794
          policy_loss: -0.06956644825255259
          total_loss: 0.06507340675442774
          vf_explained_var: 0.9347043633460999
          vf_loss: 0.12781161550655323
    num_agent_steps_sampled: 4018392
    num_agent_steps_trained: 4018392
    num_steps_sampled: 4018392
    num_steps_trained: 401

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,402,62525.2,4018392,3.25083,14.77,-1.64,91.8624




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4028388
  custom_metrics: {}
  date: 2021-11-08_07-38-59
  done: false
  episode_len_mean: 89.69369369369369
  episode_media: {}
  episode_reward_max: 14.830000000000016
  episode_reward_mean: 3.893153153153161
  episode_reward_min: -1.9500000000000008
  episodes_this_iter: 111
  episodes_total: 43826
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2079218385565995
          entropy_coeff: 0.01
          kl: 0.013855331953859936
          policy_loss: -0.06639200581285434
          total_loss: 0.09638425107472218
          vf_explained_var: 0.9353910088539124
          vf_loss: 0.15329129722797208
    num_agent_steps_sampled: 4028388
    num_agent_steps_trained: 4028388
    num_steps_sampled: 4028388
    num_steps_trained: 40

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,403,62714.4,4028388,3.89315,14.83,-1.95,89.6937




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4038384
  custom_metrics: {}
  date: 2021-11-08_07-41-55
  done: false
  episode_len_mean: 90.24107142857143
  episode_media: {}
  episode_reward_max: 12.680000000000017
  episode_reward_mean: 3.8155357142857227
  episode_reward_min: -1.4900000000000007
  episodes_this_iter: 112
  episodes_total: 43938
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1917909011881576
          entropy_coeff: 0.01
          kl: 0.012936848775514615
          policy_loss: -0.06542718691162319
          total_loss: 0.08786120169525409
          vf_explained_var: 0.9272052049636841
          vf_loss: 0.14573453832338126
    num_agent_steps_sampled: 4038384
    num_agent_steps_trained: 4038384
    num_steps_sampled: 4038384
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,404,62890.5,4038384,3.81554,12.68,-1.49,90.2411




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4048380
  custom_metrics: {}
  date: 2021-11-08_07-44-38
  done: false
  episode_len_mean: 90.57272727272728
  episode_media: {}
  episode_reward_max: 11.010000000000014
  episode_reward_mean: 4.008181818181828
  episode_reward_min: -1.5400000000000007
  episodes_this_iter: 110
  episodes_total: 44048
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1868989461507553
          entropy_coeff: 0.01
          kl: 0.014123312283008421
          policy_loss: -0.06589136902386179
          total_loss: 0.11136655296939306
          vf_explained_var: 0.9309718608856201
          vf_loss: 0.16695223877158685
    num_agent_steps_sampled: 4048380
    num_agent_steps_trained: 4048380
    num_steps_sampled: 4048380
    num_steps_trained: 40

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,405,63053.5,4048380,4.00818,11.01,-1.54,90.5727




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4058376
  custom_metrics: {}
  date: 2021-11-08_07-47-08
  done: false
  episode_len_mean: 93.15094339622641
  episode_media: {}
  episode_reward_max: 14.650000000000011
  episode_reward_mean: 3.8217924528301968
  episode_reward_min: -1.2500000000000004
  episodes_this_iter: 106
  episodes_total: 44154
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.232552194595337
          entropy_coeff: 0.01
          kl: 0.012784750380639932
          policy_loss: -0.06861677286493727
          total_loss: 0.08518904607711185
          vf_explained_var: 0.9308537244796753
          vf_loss: 0.14700608054000852
    num_agent_steps_sampled: 4058376
    num_agent_steps_trained: 4058376
    num_steps_sampled: 4058376
    num_steps_trained: 40

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,406,63203.7,4058376,3.82179,14.65,-1.25,93.1509




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4068372
  custom_metrics: {}
  date: 2021-11-08_07-49-55
  done: false
  episode_len_mean: 93.8785046728972
  episode_media: {}
  episode_reward_max: 9.710000000000004
  episode_reward_mean: 3.465981308411223
  episode_reward_min: -1.6900000000000006
  episodes_this_iter: 107
  episodes_total: 44261
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.202144443275582
          entropy_coeff: 0.01
          kl: 0.012450788204305833
          policy_loss: -0.0709234733086748
          total_loss: 0.06766416175752624
          vf_explained_var: 0.9314371943473816
          vf_loss: 0.13224462740059592
    num_agent_steps_sampled: 4068372
    num_agent_steps_trained: 4068372
    num_steps_sampled: 4068372
    num_steps_trained: 406837

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,407,63370.7,4068372,3.46598,9.71,-1.69,93.8785




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4078368
  custom_metrics: {}
  date: 2021-11-08_07-52-49
  done: false
  episode_len_mean: 91.68807339449542
  episode_media: {}
  episode_reward_max: 11.090000000000014
  episode_reward_mean: 3.8311009174312014
  episode_reward_min: -1.5600000000000007
  episodes_this_iter: 109
  episodes_total: 44370
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1916560302432786
          entropy_coeff: 0.01
          kl: 0.013526449872505035
          policy_loss: -0.0699057763784678
          total_loss: 0.08845773365579418
          vf_explained_var: 0.9385538697242737
          vf_loss: 0.1494651260292237
    num_agent_steps_sampled: 4078368
    num_agent_steps_trained: 4078368
    num_steps_sampled: 4078368
    num_steps_trained: 407

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,408,63544.7,4078368,3.8311,11.09,-1.56,91.6881




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4088364
  custom_metrics: {}
  date: 2021-11-08_07-55-21
  done: false
  episode_len_mean: 92.66055045871559
  episode_media: {}
  episode_reward_max: 14.86000000000001
  episode_reward_mean: 4.1094495412844125
  episode_reward_min: -1.3400000000000005
  episodes_this_iter: 109
  episodes_total: 44479
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1882740583175266
          entropy_coeff: 0.01
          kl: 0.01307853468379205
          policy_loss: -0.06898505410864057
          total_loss: 0.08749736473282688
          vf_explained_var: 0.942684531211853
          vf_loss: 0.148570620526488
    num_agent_steps_sampled: 4088364
    num_agent_steps_trained: 4088364
    num_steps_sampled: 4088364
    num_steps_trained: 408836

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,409,63697,4088364,4.10945,14.86,-1.34,92.6606




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4098360
  custom_metrics: {}
  date: 2021-11-08_07-57-59
  done: false
  episode_len_mean: 94.28571428571429
  episode_media: {}
  episode_reward_max: 10.750000000000014
  episode_reward_mean: 3.713333333333342
  episode_reward_min: -1.5300000000000007
  episodes_this_iter: 105
  episodes_total: 44584
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.198503725956648
          entropy_coeff: 0.01
          kl: 0.013424545706457854
          policy_loss: -0.06806675498174806
          total_loss: 0.07748502867582899
          vf_explained_var: 0.9369170069694519
          vf_loss: 0.1369540260404221
    num_agent_steps_sampled: 4098360
    num_agent_steps_trained: 4098360
    num_steps_sampled: 4098360
    num_steps_trained: 4098

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,410,63854.9,4098360,3.71333,10.75,-1.53,94.2857




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4108356
  custom_metrics: {}
  date: 2021-11-08_08-00-49
  done: false
  episode_len_mean: 93.7196261682243
  episode_media: {}
  episode_reward_max: 12.040000000000015
  episode_reward_mean: 3.611588785046739
  episode_reward_min: -1.4900000000000007
  episodes_this_iter: 107
  episodes_total: 44691
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.194233762504708
          entropy_coeff: 0.01
          kl: 0.013665323156686084
          policy_loss: -0.06517342779084913
          total_loss: 0.10240440792205115
          vf_explained_var: 0.9161876440048218
          vf_loss: 0.15838885811818207
    num_agent_steps_sampled: 4108356
    num_agent_steps_trained: 4108356
    num_steps_sampled: 4108356
    num_steps_trained: 4108

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,411,64024.7,4108356,3.61159,12.04,-1.49,93.7196




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4118352
  custom_metrics: {}
  date: 2021-11-08_08-03-30
  done: false
  episode_len_mean: 93.59433962264151
  episode_media: {}
  episode_reward_max: 10.720000000000017
  episode_reward_mean: 3.5166037735849143
  episode_reward_min: -2.2899999999999947
  episodes_this_iter: 106
  episodes_total: 44797
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1956536144272896
          entropy_coeff: 0.01
          kl: 0.013307960121577466
          policy_loss: -0.07015460631372328
          total_loss: 0.07269108425825835
          vf_explained_var: 0.9313903450965881
          vf_loss: 0.13448502980013433
    num_agent_steps_sampled: 4118352
    num_agent_steps_trained: 4118352
    num_steps_sampled: 4118352
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,412,64185.3,4118352,3.5166,10.72,-2.29,93.5943




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4128348
  custom_metrics: {}
  date: 2021-11-08_08-06-02
  done: false
  episode_len_mean: 92.9074074074074
  episode_media: {}
  episode_reward_max: 12.75000000000001
  episode_reward_mean: 3.976388888888899
  episode_reward_min: -1.5800000000000007
  episodes_this_iter: 108
  episodes_total: 44905
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1907410770399958
          entropy_coeff: 0.01
          kl: 0.013353501079196495
          policy_loss: -0.06639163381555396
          total_loss: 0.07844294292502041
          vf_explained_var: 0.9420851469039917
          vf_loss: 0.13632104246375654
    num_agent_steps_sampled: 4128348
    num_agent_steps_trained: 4128348
    num_steps_sampled: 4128348
    num_steps_trained: 4128

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,413,64337.8,4128348,3.97639,12.75,-1.58,92.9074




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4138344
  custom_metrics: {}
  date: 2021-11-08_08-08-33
  done: false
  episode_len_mean: 94.05607476635514
  episode_media: {}
  episode_reward_max: 12.940000000000012
  episode_reward_mean: 3.646915887850477
  episode_reward_min: -1.3100000000000005
  episodes_this_iter: 107
  episodes_total: 45012
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2084657475479648
          entropy_coeff: 0.01
          kl: 0.013570634059755216
          policy_loss: -0.06465512636300717
          total_loss: 0.09137675691173118
          vf_explained_var: 0.9383723139762878
          vf_loss: 0.14720094071573808
    num_agent_steps_sampled: 4138344
    num_agent_steps_trained: 4138344
    num_steps_sampled: 4138344
    num_steps_trained: 41

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,414,64488,4138344,3.64692,12.94,-1.31,94.0561


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4148340
  custom_metrics: {}
  date: 2021-11-08_08-10-53
  done: false
  episode_len_mean: 92.42592592592592
  episode_media: {}
  episode_reward_max: 14.360000000000017
  episode_reward_mean: 3.5975000000000077
  episode_reward_min: -1.6700000000000008
  episodes_this_iter: 108
  episodes_total: 45120
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1871672168756144
          entropy_coeff: 0.01
          kl: 0.012946654925794788
          policy_loss: -0.06790018582509624
          total_loss: 0.09808046225394704
          vf_explained_var: 0.9238619208335876
          vf_loss: 0.15835822000462785
    num_agent_steps_sampled: 4148340
    num_agent_steps_trained: 4148340
    num_steps_sampled: 4148340
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,415,64628.1,4148340,3.5975,14.36,-1.67,92.4259




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4158336
  custom_metrics: {}
  date: 2021-11-08_08-13-26
  done: false
  episode_len_mean: 94.74528301886792
  episode_media: {}
  episode_reward_max: 13.000000000000014
  episode_reward_mean: 3.2172641509434046
  episode_reward_min: -1.6100000000000003
  episodes_this_iter: 106
  episodes_total: 45226
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1997144984383867
          entropy_coeff: 0.01
          kl: 0.013251135794876084
          policy_loss: -0.06919593505847912
          total_loss: 0.07824794278105991
          vf_explained_var: 0.9258736968040466
          vf_loss: 0.13925327802888857
    num_agent_steps_sampled: 4158336
    num_agent_steps_trained: 4158336
    num_steps_sampled: 4158336
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,416,64781.2,4158336,3.21726,13,-1.61,94.7453




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4168332
  custom_metrics: {}
  date: 2021-11-08_08-16-14
  done: false
  episode_len_mean: 91.25688073394495
  episode_media: {}
  episode_reward_max: 15.05000000000001
  episode_reward_mean: 3.8876146788990917
  episode_reward_min: -1.4300000000000008
  episodes_this_iter: 109
  episodes_total: 45335
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.199414829107431
          entropy_coeff: 0.01
          kl: 0.013460099118370037
          policy_loss: -0.07145873848189656
          total_loss: 0.08314912877021692
          vf_explained_var: 0.9354813694953918
          vf_loss: 0.14593822598202616
    num_agent_steps_sampled: 4168332
    num_agent_steps_trained: 4168332
    num_steps_sampled: 4168332
    num_steps_trained: 416

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,417,64948.7,4168332,3.88761,15.05,-1.43,91.2569




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4178328
  custom_metrics: {}
  date: 2021-11-08_08-18-58
  done: false
  episode_len_mean: 95.18867924528301
  episode_media: {}
  episode_reward_max: 12.710000000000012
  episode_reward_mean: 3.97745283018869
  episode_reward_min: -1.6700000000000008
  episodes_this_iter: 106
  episodes_total: 45441
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.190525872483213
          entropy_coeff: 0.01
          kl: 0.013485541751588245
          policy_loss: -0.06617835932371453
          total_loss: 0.09396520340735587
          vf_explained_var: 0.9334796071052551
          vf_loss: 0.15132707053731775
    num_agent_steps_sampled: 4178328
    num_agent_steps_trained: 4178328
    num_steps_sampled: 4178328
    num_steps_trained: 4178

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,418,65112.9,4178328,3.97745,12.71,-1.67,95.1887




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4188324
  custom_metrics: {}
  date: 2021-11-08_08-22-01
  done: false
  episode_len_mean: 91.36697247706422
  episode_media: {}
  episode_reward_max: 11.180000000000012
  episode_reward_mean: 3.976605504587165
  episode_reward_min: -1.8500000000000005
  episodes_this_iter: 109
  episodes_total: 45550
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1879957081925157
          entropy_coeff: 0.01
          kl: 0.013948500864663486
          policy_loss: -0.06796718035052475
          total_loss: 0.08606065941226279
          vf_explained_var: 0.943539023399353
          vf_loss: 0.14413136748644786
    num_agent_steps_sampled: 4188324
    num_agent_steps_trained: 4188324
    num_steps_sampled: 4188324
    num_steps_trained: 418

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,419,65296.5,4188324,3.97661,11.18,-1.85,91.367




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4198320
  custom_metrics: {}
  date: 2021-11-08_08-24-40
  done: false
  episode_len_mean: 93.81132075471699
  episode_media: {}
  episode_reward_max: 12.790000000000013
  episode_reward_mean: 4.126792452830199
  episode_reward_min: -1.1900000000000004
  episodes_this_iter: 106
  episodes_total: 45656
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1586471838828847
          entropy_coeff: 0.01
          kl: 0.012925330623064253
          policy_loss: -0.0651422929432657
          total_loss: 0.06910212974772494
          vf_explained_var: 0.9559566974639893
          vf_loss: 0.12638537367153116
    num_agent_steps_sampled: 4198320
    num_agent_steps_trained: 4198320
    num_steps_sampled: 4198320
    num_steps_trained: 419

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,420,65455.1,4198320,4.12679,12.79,-1.19,93.8113




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4208316
  custom_metrics: {}
  date: 2021-11-08_08-27-10
  done: false
  episode_len_mean: 95.73076923076923
  episode_media: {}
  episode_reward_max: 10.570000000000018
  episode_reward_mean: 3.352307692307701
  episode_reward_min: -1.7100000000000009
  episodes_this_iter: 104
  episodes_total: 45760
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1915318433036153
          entropy_coeff: 0.01
          kl: 0.012852036380932142
          policy_loss: -0.07010253447657212
          total_loss: 0.06983439184836725
          vf_explained_var: 0.9370538592338562
          vf_loss: 0.1325736988479128
    num_agent_steps_sampled: 4208316
    num_agent_steps_trained: 4208316
    num_steps_sampled: 4208316
    num_steps_trained: 420

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,421,65604.5,4208316,3.35231,10.57,-1.71,95.7308




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4218312
  custom_metrics: {}
  date: 2021-11-08_08-29-54
  done: false
  episode_len_mean: 94.33018867924528
  episode_media: {}
  episode_reward_max: 12.580000000000016
  episode_reward_mean: 4.160849056603784
  episode_reward_min: -2.1399999999999997
  episodes_this_iter: 106
  episodes_total: 45866
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.150744915721763
          entropy_coeff: 0.01
          kl: 0.014163084036750534
          policy_loss: -0.06440481930258724
          total_loss: 0.11673047612222213
          vf_explained_var: 0.9324191808700562
          vf_loss: 0.1703774684896836
    num_agent_steps_sampled: 4218312
    num_agent_steps_trained: 4218312
    num_steps_sampled: 4218312
    num_steps_trained: 4218

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,422,65769.1,4218312,4.16085,12.58,-2.14,94.3302




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4228308
  custom_metrics: {}
  date: 2021-11-08_08-32-29
  done: false
  episode_len_mean: 92.63888888888889
  episode_media: {}
  episode_reward_max: 12.230000000000015
  episode_reward_mean: 3.583888888888898
  episode_reward_min: -1.4000000000000006
  episodes_this_iter: 108
  episodes_total: 45974
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.191594819941072
          entropy_coeff: 0.01
          kl: 0.013013488735916368
          policy_loss: -0.06511404713114294
          total_loss: 0.10745197597604532
          vf_explained_var: 0.9338178634643555
          vf_loss: 0.16483561669070368
    num_agent_steps_sampled: 4228308
    num_agent_steps_trained: 4228308
    num_steps_sampled: 4228308
    num_steps_trained: 422

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,423,65923.5,4228308,3.58389,12.23,-1.4,92.6389




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4238304
  custom_metrics: {}
  date: 2021-11-08_08-35-37
  done: false
  episode_len_mean: 92.78703703703704
  episode_media: {}
  episode_reward_max: 12.940000000000014
  episode_reward_mean: 4.0065740740740825
  episode_reward_min: -0.9200000000000004
  episodes_this_iter: 108
  episodes_total: 46082
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.193988230900887
          entropy_coeff: 0.01
          kl: 0.013536761462499032
          policy_loss: -0.06544336928134291
          total_loss: 0.09669108684532918
          vf_explained_var: 0.9377652406692505
          vf_loss: 0.15323590281236377
    num_agent_steps_sampled: 4238304
    num_agent_steps_trained: 4238304
    num_steps_sampled: 4238304
    num_steps_trained: 42

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,424,66111.8,4238304,4.00657,12.94,-0.92,92.787




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4248300
  custom_metrics: {}
  date: 2021-11-08_08-38-18
  done: false
  episode_len_mean: 92.67289719626169
  episode_media: {}
  episode_reward_max: 12.610000000000014
  episode_reward_mean: 3.7762616822429997
  episode_reward_min: -1.2400000000000004
  episodes_this_iter: 107
  episodes_total: 46189
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1627939925234543
          entropy_coeff: 0.01
          kl: 0.012694999339967313
          policy_loss: -0.0695162040523739
          total_loss: 0.07764866601707589
          vf_explained_var: 0.9401608109474182
          vf_loss: 0.13987201389530277
    num_agent_steps_sampled: 4248300
    num_agent_steps_trained: 4248300
    num_steps_sampled: 4248300
    num_steps_trained: 42

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,425,66273.3,4248300,3.77626,12.61,-1.24,92.6729




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4258296
  custom_metrics: {}
  date: 2021-11-08_08-41-02
  done: false
  episode_len_mean: 91.53636363636363
  episode_media: {}
  episode_reward_max: 10.800000000000011
  episode_reward_mean: 3.771636363636373
  episode_reward_min: -1.5900000000000007
  episodes_this_iter: 110
  episodes_total: 46299
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1946246431424066
          entropy_coeff: 0.01
          kl: 0.013049858573776057
          policy_loss: -0.0685580959973427
          total_loss: 0.09332723398175505
          vf_explained_var: 0.9384799003601074
          vf_loss: 0.154102366632567
    num_agent_steps_sampled: 4258296
    num_agent_steps_trained: 4258296
    num_steps_sampled: 4258296
    num_steps_trained: 42582

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,426,66437.1,4258296,3.77164,10.8,-1.59,91.5364




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4268292
  custom_metrics: {}
  date: 2021-11-08_08-43-33
  done: false
  episode_len_mean: 93.06542056074767
  episode_media: {}
  episode_reward_max: 14.870000000000017
  episode_reward_mean: 4.191495327102813
  episode_reward_min: -1.7900000000000007
  episodes_this_iter: 107
  episodes_total: 46406
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.174702736047598
          entropy_coeff: 0.01
          kl: 0.012752620625056494
          policy_loss: -0.06958157820467893
          total_loss: 0.08406782608845423
          vf_explained_var: 0.9382973313331604
          vf_loss: 0.1463443668702474
    num_agent_steps_sampled: 4268292
    num_agent_steps_trained: 4268292
    num_steps_sampled: 4268292
    num_steps_trained: 4268

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,427,66588,4268292,4.1915,14.87,-1.79,93.0654




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4278288
  custom_metrics: {}
  date: 2021-11-08_08-46-01
  done: false
  episode_len_mean: 94.42056074766356
  episode_media: {}
  episode_reward_max: 12.570000000000023
  episode_reward_mean: 3.5600000000000076
  episode_reward_min: -1.780000000000001
  episodes_this_iter: 107
  episodes_total: 46513
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.211117487381666
          entropy_coeff: 0.01
          kl: 0.013170536175733126
          policy_loss: -0.06289148626323694
          total_loss: 0.0973399846137971
          vf_explained_var: 0.9308083653450012
          vf_loss: 0.15233851672492477
    num_agent_steps_sampled: 4278288
    num_agent_steps_trained: 4278288
    num_steps_sampled: 4278288
    num_steps_trained: 4278

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,428,66736.1,4278288,3.56,12.57,-1.78,94.4206




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4288284
  custom_metrics: {}
  date: 2021-11-08_08-48-32
  done: false
  episode_len_mean: 93.33018867924528
  episode_media: {}
  episode_reward_max: 14.570000000000014
  episode_reward_mean: 3.6414150943396315
  episode_reward_min: -1.520000000000001
  episodes_this_iter: 106
  episodes_total: 46619
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.193110685267
          entropy_coeff: 0.01
          kl: 0.013701907926352957
          policy_loss: -0.06866432583898816
          total_loss: 0.09365027222830133
          vf_explained_var: 0.9367602467536926
          vf_loss: 0.15303104614886717
    num_agent_steps_sampled: 4288284
    num_agent_steps_trained: 4288284
    num_steps_sampled: 4288284
    num_steps_trained: 428828

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,429,66887,4288284,3.64142,14.57,-1.52,93.3302




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4298280
  custom_metrics: {}
  date: 2021-11-08_08-51-02
  done: false
  episode_len_mean: 93.06542056074767
  episode_media: {}
  episode_reward_max: 13.23000000000001
  episode_reward_mean: 3.7883177570093554
  episode_reward_min: -1.4600000000000004
  episodes_this_iter: 107
  episodes_total: 46726
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.187019762524173
          entropy_coeff: 0.01
          kl: 0.013534239433635464
          policy_loss: -0.06821232407202578
          total_loss: 0.0950972365700982
          vf_explained_var: 0.9169899821281433
          vf_loss: 0.154347067940821
    num_agent_steps_sampled: 4298280
    num_agent_steps_trained: 4298280
    num_steps_sampled: 4298280
    num_steps_trained: 429828

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,430,67036.7,4298280,3.78832,13.23,-1.46,93.0654




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4308276
  custom_metrics: {}
  date: 2021-11-08_08-53-35
  done: false
  episode_len_mean: 95.51428571428572
  episode_media: {}
  episode_reward_max: 12.750000000000018
  episode_reward_mean: 4.0635238095238195
  episode_reward_min: -1.590000000000001
  episodes_this_iter: 105
  episodes_total: 46831
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.175864988310724
          entropy_coeff: 0.01
          kl: 0.01391946306409495
          policy_loss: -0.06533067736010521
          total_loss: 0.0901479318865344
          vf_explained_var: 0.9427686333656311
          vf_loss: 0.145526980721734
    num_agent_steps_sampled: 4308276
    num_agent_steps_trained: 4308276
    num_steps_sampled: 4308276
    num_steps_trained: 4308276

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,431,67189.4,4308276,4.06352,12.75,-1.59,95.5143




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4318272
  custom_metrics: {}
  date: 2021-11-08_08-56-06
  done: false
  episode_len_mean: 94.81132075471699
  episode_media: {}
  episode_reward_max: 12.480000000000016
  episode_reward_mean: 4.067358490566049
  episode_reward_min: -1.7000000000000008
  episodes_this_iter: 106
  episodes_total: 46937
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.166742431811797
          entropy_coeff: 0.01
          kl: 0.01387244432253061
          policy_loss: -0.06366099279660445
          total_loss: 0.11707050474838186
          vf_explained_var: 0.9281685948371887
          vf_loss: 0.1707957598595665
    num_agent_steps_sampled: 4318272
    num_agent_steps_trained: 4318272
    num_steps_sampled: 4318272
    num_steps_trained: 43182

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,432,67340.2,4318272,4.06736,12.48,-1.7,94.8113




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4328268
  custom_metrics: {}
  date: 2021-11-08_08-59-08
  done: false
  episode_len_mean: 93.1214953271028
  episode_media: {}
  episode_reward_max: 14.59000000000002
  episode_reward_mean: 3.4900934579439338
  episode_reward_min: -2.130000000000001
  episodes_this_iter: 107
  episodes_total: 47044
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.192212865087721
          entropy_coeff: 0.01
          kl: 0.012734463898041486
          policy_loss: -0.0646487179506793
          total_loss: 0.08126350987909568
          vf_explained_var: 0.9369053244590759
          vf_loss: 0.13882365578260178
    num_agent_steps_sampled: 4328268
    num_agent_steps_trained: 4328268
    num_steps_sampled: 4328268
    num_steps_trained: 432826

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,433,67522.5,4328268,3.49009,14.59,-2.13,93.1215




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4338264
  custom_metrics: {}
  date: 2021-11-08_09-01-45
  done: false
  episode_len_mean: 93.18867924528301
  episode_media: {}
  episode_reward_max: 12.820000000000014
  episode_reward_mean: 3.932641509433972
  episode_reward_min: -1.790000000000001
  episodes_this_iter: 106
  episodes_total: 47150
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1783532872159257
          entropy_coeff: 0.01
          kl: 0.012967397919042454
          policy_loss: -0.06720937876524324
          total_loss: 0.08648040374884239
          vf_explained_var: 0.9349953532218933
          vf_loss: 0.14593196065825784
    num_agent_steps_sampled: 4338264
    num_agent_steps_trained: 4338264
    num_steps_sampled: 4338264
    num_steps_trained: 433

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,434,67679.2,4338264,3.93264,12.82,-1.79,93.1887




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4348260
  custom_metrics: {}
  date: 2021-11-08_09-04-47
  done: false
  episode_len_mean: 92.4
  episode_media: {}
  episode_reward_max: 12.60000000000002
  episode_reward_mean: 4.148545454545463
  episode_reward_min: -1.4900000000000004
  episodes_this_iter: 110
  episodes_total: 47260
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1753786580175416
          entropy_coeff: 0.01
          kl: 0.013980059869402768
          policy_loss: -0.06712413228984572
          total_loss: 0.09887256717197915
          vf_explained_var: 0.9383128881454468
          vf_loss: 0.15590216202231555
    num_agent_steps_sampled: 4348260
    num_agent_steps_trained: 4348260
    num_steps_sampled: 4348260
    num_steps_trained: 4348260
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,435,67861.7,4348260,4.14855,12.6,-1.49,92.4


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4358256
  custom_metrics: {}
  date: 2021-11-08_09-07-06
  done: false
  episode_len_mean: 95.1923076923077
  episode_media: {}
  episode_reward_max: 16.369999999999987
  episode_reward_mean: 4.49576923076924
  episode_reward_min: -1.6900000000000006
  episodes_this_iter: 104
  episodes_total: 47364
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1567796422885013
          entropy_coeff: 0.01
          kl: 0.01296700605030532
          policy_loss: -0.06770500835094952
          total_loss: 0.08779809705944908
          vf_explained_var: 0.9462521076202393
          vf_loss: 0.14753044120903708
    num_agent_steps_sampled: 4358256
    num_agent_steps_trained: 4358256
    num_steps_sampled: 4358256
    num_steps_trained: 43582

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,436,67999.9,4358256,4.49577,16.37,-1.69,95.1923




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4368252
  custom_metrics: {}
  date: 2021-11-08_09-09-32
  done: false
  episode_len_mean: 96.03809523809524
  episode_media: {}
  episode_reward_max: 11.19000000000001
  episode_reward_mean: 4.060476190476201
  episode_reward_min: -2.000000000000001
  episodes_this_iter: 105
  episodes_total: 47469
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.17539571248568
          entropy_coeff: 0.01
          kl: 0.013667290171904245
          policy_loss: -0.06857536891156919
          total_loss: 0.08989419804121822
          vf_explained_var: 0.9333586096763611
          vf_loss: 0.14908772806485748
    num_agent_steps_sampled: 4368252
    num_agent_steps_trained: 4368252
    num_steps_sampled: 4368252
    num_steps_trained: 436825

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,437,68146.4,4368252,4.06048,11.19,-2,96.0381




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4378248
  custom_metrics: {}
  date: 2021-11-08_09-12-20
  done: false
  episode_len_mean: 93.82242990654206
  episode_media: {}
  episode_reward_max: 12.150000000000015
  episode_reward_mean: 3.749813084112158
  episode_reward_min: -1.4400000000000004
  episodes_this_iter: 107
  episodes_total: 47576
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1619676767251432
          entropy_coeff: 0.01
          kl: 0.014028678336347569
          policy_loss: -0.06591576064785576
          total_loss: 0.10617317923018312
          vf_explained_var: 0.9226055145263672
          vf_loss: 0.1617495328761064
    num_agent_steps_sampled: 4378248
    num_agent_steps_trained: 4378248
    num_steps_sampled: 4378248
    num_steps_trained: 437

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,438,68314.3,4378248,3.74981,12.15,-1.44,93.8224




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4388244
  custom_metrics: {}
  date: 2021-11-08_09-14-50
  done: false
  episode_len_mean: 93.77142857142857
  episode_media: {}
  episode_reward_max: 12.450000000000019
  episode_reward_mean: 3.970095238095247
  episode_reward_min: -1.4700000000000006
  episodes_this_iter: 105
  episodes_total: 47681
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.2031607874438293
          entropy_coeff: 0.01
          kl: 0.012837058459041222
          policy_loss: -0.06792928827687716
          total_loss: 0.11450096062647226
          vf_explained_var: 0.9222427606582642
          vf_loss: 0.17521743195720463
    num_agent_steps_sampled: 4388244
    num_agent_steps_trained: 4388244
    num_steps_sampled: 4388244
    num_steps_trained: 43

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,439,68463.9,4388244,3.9701,12.45,-1.47,93.7714




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4398240
  custom_metrics: {}
  date: 2021-11-08_09-17-22
  done: false
  episode_len_mean: 92.77064220183486
  episode_media: {}
  episode_reward_max: 12.680000000000017
  episode_reward_mean: 3.2693577981651463
  episode_reward_min: -1.760000000000001
  episodes_this_iter: 109
  episodes_total: 47790
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.179918657714485
          entropy_coeff: 0.01
          kl: 0.012866413815329943
          policy_loss: -0.06753905803984046
          total_loss: 0.08321171440860718
          vf_explained_var: 0.9243295788764954
          vf_loss: 0.1432386591274323
    num_agent_steps_sampled: 4398240
    num_agent_steps_trained: 4398240
    num_steps_sampled: 4398240
    num_steps_trained: 4398

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,440,68615.9,4398240,3.26936,12.68,-1.76,92.7706




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4408236
  custom_metrics: {}
  date: 2021-11-08_09-20-06
  done: false
  episode_len_mean: 91.71296296296296
  episode_media: {}
  episode_reward_max: 11.01000000000001
  episode_reward_mean: 3.537037037037046
  episode_reward_min: -1.620000000000001
  episodes_this_iter: 108
  episodes_total: 47898
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1878037719645054
          entropy_coeff: 0.01
          kl: 0.013297375787935757
          policy_loss: -0.06639728294446683
          total_loss: 0.10310752287220497
          vf_explained_var: 0.9300084114074707
          vf_loss: 0.16108975840302614
    num_agent_steps_sampled: 4408236
    num_agent_steps_trained: 4408236
    num_steps_sampled: 4408236
    num_steps_trained: 4408

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,441,68780.3,4408236,3.53704,11.01,-1.62,91.713




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4418232
  custom_metrics: {}
  date: 2021-11-08_09-22-38
  done: false
  episode_len_mean: 92.35779816513761
  episode_media: {}
  episode_reward_max: 12.77000000000002
  episode_reward_mean: 3.9921100917431285
  episode_reward_min: -1.2900000000000005
  episodes_this_iter: 109
  episodes_total: 48007
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.164072797441075
          entropy_coeff: 0.01
          kl: 0.013408065369454042
          policy_loss: -0.06746377328044584
          total_loss: 0.09260771785759264
          vf_explained_var: 0.9308831691741943
          vf_loss: 0.1511669698656879
    num_agent_steps_sampled: 4418232
    num_agent_steps_trained: 4418232
    num_steps_sampled: 4418232
    num_steps_trained: 4418

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,442,68932.4,4418232,3.99211,12.77,-1.29,92.3578




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4428228
  custom_metrics: {}
  date: 2021-11-08_09-25-15
  done: false
  episode_len_mean: 91.25688073394495
  episode_media: {}
  episode_reward_max: 12.580000000000021
  episode_reward_mean: 3.555688073394504
  episode_reward_min: -1.4100000000000008
  episodes_this_iter: 109
  episodes_total: 48116
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.177999690047696
          entropy_coeff: 0.01
          kl: 0.013288486839599222
          policy_loss: -0.06265606842457484
          total_loss: 0.10291515037210451
          vf_explained_var: 0.9228453040122986
          vf_loss: 0.1570783812775571
    num_agent_steps_sampled: 4428228
    num_agent_steps_trained: 4428228
    num_steps_sampled: 4428228
    num_steps_trained: 4428

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,443,69089,4428228,3.55569,12.58,-1.41,91.2569


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4438224
  custom_metrics: {}
  date: 2021-11-08_09-27-35
  done: false
  episode_len_mean: 94.97169811320755
  episode_media: {}
  episode_reward_max: 12.740000000000016
  episode_reward_mean: 3.5666981132075564
  episode_reward_min: -1.880000000000001
  episodes_this_iter: 106
  episodes_total: 48222
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.193759940832089
          entropy_coeff: 0.01
          kl: 0.013342025256412742
          policy_loss: -0.06720376779269586
          total_loss: 0.09824209016405491
          vf_explained_var: 0.9239227771759033
          vf_loss: 0.15698865495431116
    num_agent_steps_sampled: 4438224
    num_agent_steps_trained: 4438224
    num_steps_sampled: 4438224
    num_steps_trained: 443

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,444,69228.9,4438224,3.5667,12.74,-1.88,94.9717




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4448220
  custom_metrics: {}
  date: 2021-11-08_09-30-09
  done: false
  episode_len_mean: 90.59633027522936
  episode_media: {}
  episode_reward_max: 12.010000000000018
  episode_reward_mean: 3.7007339449541377
  episode_reward_min: -1.3600000000000003
  episodes_this_iter: 109
  episodes_total: 48331
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.171982989148197
          entropy_coeff: 0.01
          kl: 0.012791097308694967
          policy_loss: -0.0684396517667999
          total_loss: 0.08727104985115365
          vf_explained_var: 0.9311335682868958
          vf_loss: 0.14829081223529372
    num_agent_steps_sampled: 4448220
    num_agent_steps_trained: 4448220
    num_steps_sampled: 4448220
    num_steps_trained: 444

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,445,69382.7,4448220,3.70073,12.01,-1.36,90.5963




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4458216
  custom_metrics: {}
  date: 2021-11-08_09-33-00
  done: false
  episode_len_mean: 92.89814814814815
  episode_media: {}
  episode_reward_max: 10.77000000000002
  episode_reward_mean: 3.4184259259259346
  episode_reward_min: -1.4500000000000008
  episodes_this_iter: 108
  episodes_total: 48439
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.178596302892408
          entropy_coeff: 0.01
          kl: 0.013638855822247541
          policy_loss: -0.06427897508136737
          total_loss: 0.10580647760190261
          vf_explained_var: 0.9163147211074829
          vf_loss: 0.16080039736265556
    num_agent_steps_sampled: 4458216
    num_agent_steps_trained: 4458216
    num_steps_sampled: 4458216
    num_steps_trained: 445

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,446,69553.5,4458216,3.41843,10.77,-1.45,92.8981




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4468212
  custom_metrics: {}
  date: 2021-11-08_09-35-30
  done: false
  episode_len_mean: 94.80188679245283
  episode_media: {}
  episode_reward_max: 14.450000000000019
  episode_reward_mean: 3.753962264150953
  episode_reward_min: -1.639999999999999
  episodes_this_iter: 106
  episodes_total: 48545
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.195798749393887
          entropy_coeff: 0.01
          kl: 0.01272178198987024
          policy_loss: -0.06857214570403672
          total_loss: 0.08040440315898094
          vf_explained_var: 0.9299439787864685
          vf_loss: 0.14195272574981307
    num_agent_steps_sampled: 4468212
    num_agent_steps_trained: 4468212
    num_steps_sampled: 4468212
    num_steps_trained: 44682

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,447,69703.8,4468212,3.75396,14.45,-1.64,94.8019




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4478208
  custom_metrics: {}
  date: 2021-11-08_09-38-17
  done: false
  episode_len_mean: 93.95283018867924
  episode_media: {}
  episode_reward_max: 14.670000000000016
  episode_reward_mean: 3.7257547169811396
  episode_reward_min: -1.5200000000000007
  episodes_this_iter: 106
  episodes_total: 48651
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1919232963496804
          entropy_coeff: 0.01
          kl: 0.01374582481210189
          policy_loss: -0.06469867906700343
          total_loss: 0.1198623973931958
          vf_explained_var: 0.916352391242981
          vf_loss: 0.17516560105877554
    num_agent_steps_sampled: 4478208
    num_agent_steps_trained: 4478208
    num_steps_sampled: 4478208
    num_steps_trained: 4478

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,448,69870.7,4478208,3.72575,14.67,-1.52,93.9528


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4488204
  custom_metrics: {}
  date: 2021-11-08_09-40-36
  done: false
  episode_len_mean: 94.0377358490566
  episode_media: {}
  episode_reward_max: 13.230000000000013
  episode_reward_mean: 3.5731132075471788
  episode_reward_min: -1.6100000000000008
  episodes_this_iter: 106
  episodes_total: 48757
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.156036737637642
          entropy_coeff: 0.01
          kl: 0.01230572815979861
          policy_loss: -0.0665735333489302
          total_loss: 0.09751481816777562
          vf_explained_var: 0.9201086163520813
          vf_loss: 0.1576147316214748
    num_agent_steps_sampled: 4488204
    num_agent_steps_trained: 4488204
    num_steps_sampled: 4488204
    num_steps_trained: 448820

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,449,70009.6,4488204,3.57311,13.23,-1.61,94.0377




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4498200
  custom_metrics: {}
  date: 2021-11-08_09-43-23
  done: false
  episode_len_mean: 91.5
  episode_media: {}
  episode_reward_max: 12.370000000000019
  episode_reward_mean: 3.6761818181818264
  episode_reward_min: -1.4500000000000008
  episodes_this_iter: 110
  episodes_total: 48867
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1816313391057855
          entropy_coeff: 0.01
          kl: 0.013009433533141709
          policy_loss: -0.06878777051933556
          total_loss: 0.07551064561320166
          vf_explained_var: 0.9302858710289001
          vf_loss: 0.1364776137850096
    num_agent_steps_sampled: 4498200
    num_agent_steps_trained: 4498200
    num_steps_sampled: 4498200
    num_steps_trained: 4498200
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,450,70177,4498200,3.67618,12.37,-1.45,91.5




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4508196
  custom_metrics: {}
  date: 2021-11-08_09-46-00
  done: false
  episode_len_mean: 91.64814814814815
  episode_media: {}
  episode_reward_max: 16.18
  episode_reward_mean: 3.6889814814814894
  episode_reward_min: -0.9200000000000003
  episodes_this_iter: 108
  episodes_total: 48975
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1795067383692817
          entropy_coeff: 0.01
          kl: 0.013161393425455291
          policy_loss: -0.0657170192967368
          total_loss: 0.10039707554233634
          vf_explained_var: 0.9258984923362732
          vf_loss: 0.15792586186056973
    num_agent_steps_sampled: 4508196
    num_agent_steps_trained: 4508196
    num_steps_sampled: 4508196
    num_steps_trained: 4508196
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,451,70334.3,4508196,3.68898,16.18,-0.92,91.6481




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4518192
  custom_metrics: {}
  date: 2021-11-08_09-48-33
  done: false
  episode_len_mean: 92.89814814814815
  episode_media: {}
  episode_reward_max: 12.610000000000015
  episode_reward_mean: 4.281111111111121
  episode_reward_min: -2.000000000000001
  episodes_this_iter: 108
  episodes_total: 49083
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.177280055763375
          entropy_coeff: 0.01
          kl: 0.013954038008514627
          policy_loss: -0.06335406661925153
          total_loss: 0.10593405546318008
          vf_explained_var: 0.9407122135162354
          vf_loss: 0.15927187871092405
    num_agent_steps_sampled: 4518192
    num_agent_steps_trained: 4518192
    num_steps_sampled: 4518192
    num_steps_trained: 4518

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,452,70487.2,4518192,4.28111,12.61,-2,92.8981




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4528188
  custom_metrics: {}
  date: 2021-11-08_09-51-41
  done: false
  episode_len_mean: 90.1891891891892
  episode_media: {}
  episode_reward_max: 16.56999999999996
  episode_reward_mean: 3.9883783783783864
  episode_reward_min: -1.6400000000000006
  episodes_this_iter: 111
  episodes_total: 49194
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1568512096364274
          entropy_coeff: 0.01
          kl: 0.013826834398871212
          policy_loss: -0.06245097577189788
          total_loss: 0.1225730956890262
          vf_explained_var: 0.9323893189430237
          vf_loss: 0.17509332531983526
    num_agent_steps_sampled: 4528188
    num_agent_steps_trained: 4528188
    num_steps_sampled: 4528188
    num_steps_trained: 4528

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,453,70674.3,4528188,3.98838,16.57,-1.64,90.1892




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4538184
  custom_metrics: {}
  date: 2021-11-08_09-54-27
  done: false
  episode_len_mean: 93.06481481481481
  episode_media: {}
  episode_reward_max: 14.52000000000002
  episode_reward_mean: 4.058703703703713
  episode_reward_min: -1.0500000000000005
  episodes_this_iter: 108
  episodes_total: 49302
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1676534544708383
          entropy_coeff: 0.01
          kl: 0.013367489535963659
          policy_loss: -0.06925862719838181
          total_loss: 0.10015893798305565
          vf_explained_var: 0.9446707367897034
          vf_loss: 0.16064128613529297
    num_agent_steps_sampled: 4538184
    num_agent_steps_trained: 4538184
    num_steps_sampled: 4538184
    num_steps_trained: 453

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,454,70840.7,4538184,4.0587,14.52,-1.05,93.0648




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4548180
  custom_metrics: {}
  date: 2021-11-08_09-57-01
  done: false
  episode_len_mean: 95.10576923076923
  episode_media: {}
  episode_reward_max: 10.270000000000016
  episode_reward_mean: 3.4782692307692398
  episode_reward_min: -1.6900000000000006
  episodes_this_iter: 104
  episodes_total: 49406
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1642304462245385
          entropy_coeff: 0.01
          kl: 0.013612241328718752
          policy_loss: -0.06779810783063245
          total_loss: 0.10466771228796141
          vf_explained_var: 0.9256669282913208
          vf_loss: 0.16309773615664905
    num_agent_steps_sampled: 4548180
    num_agent_steps_trained: 4548180
    num_steps_sampled: 4548180
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,455,70995.1,4548180,3.47827,10.27,-1.69,95.1058




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4558176
  custom_metrics: {}
  date: 2021-11-08_09-59-53
  done: false
  episode_len_mean: 90.46846846846847
  episode_media: {}
  episode_reward_max: 13.140000000000011
  episode_reward_mean: 3.7809009009009085
  episode_reward_min: -1.4700000000000009
  episodes_this_iter: 111
  episodes_total: 49517
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.160636700014783
          entropy_coeff: 0.01
          kl: 0.013003329377736258
          policy_loss: -0.0630716513364743
          total_loss: 0.11079963290960425
          vf_explained_var: 0.9098851084709167
          vf_loss: 0.16585444221034265
    num_agent_steps_sampled: 4558176
    num_agent_steps_trained: 4558176
    num_steps_sampled: 4558176
    num_steps_trained: 455

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,456,71166.9,4558176,3.7809,13.14,-1.47,90.4685




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4568172
  custom_metrics: {}
  date: 2021-11-08_10-02-39
  done: false
  episode_len_mean: 93.69158878504673
  episode_media: {}
  episode_reward_max: 14.700000000000012
  episode_reward_mean: 3.837570093457953
  episode_reward_min: -1.3600000000000005
  episodes_this_iter: 107
  episodes_total: 49624
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1655809645978814
          entropy_coeff: 0.01
          kl: 0.012969227266634624
          policy_loss: -0.06960869274205632
          total_loss: 0.09564008401531694
          vf_explained_var: 0.9252680540084839
          vf_loss: 0.1573590646044184
    num_agent_steps_sampled: 4568172
    num_agent_steps_trained: 4568172
    num_steps_sampled: 4568172
    num_steps_trained: 456

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,457,71332.3,4568172,3.83757,14.7,-1.36,93.6916




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4578168
  custom_metrics: {}
  date: 2021-11-08_10-05-18
  done: false
  episode_len_mean: 91.5229357798165
  episode_media: {}
  episode_reward_max: 12.31000000000002
  episode_reward_mean: 4.455137614678908
  episode_reward_min: -1.4700000000000009
  episodes_this_iter: 109
  episodes_total: 49733
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.146037627387251
          entropy_coeff: 0.01
          kl: 0.013476896330187187
          policy_loss: -0.06610189945651934
          total_loss: 0.09178093649669845
          vf_explained_var: 0.9404269456863403
          vf_loss: 0.14864115693216395
    num_agent_steps_sampled: 4578168
    num_agent_steps_trained: 4578168
    num_steps_sampled: 4578168
    num_steps_trained: 45781

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,458,71491.5,4578168,4.45514,12.31,-1.47,91.5229




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4588164
  custom_metrics: {}
  date: 2021-11-08_10-07-48
  done: false
  episode_len_mean: 94.37142857142857
  episode_media: {}
  episode_reward_max: 10.920000000000012
  episode_reward_mean: 3.9053333333333433
  episode_reward_min: -1.910000000000001
  episodes_this_iter: 105
  episodes_total: 49838
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.160908337230356
          entropy_coeff: 0.01
          kl: 0.013554106554332906
          policy_loss: -0.06868847772224336
          total_loss: 0.0996706241972617
          vf_explained_var: 0.9295716881752014
          vf_loss: 0.15909023398422023
    num_agent_steps_sampled: 4588164
    num_agent_steps_trained: 4588164
    num_steps_sampled: 4588164
    num_steps_trained: 4588

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,459,71641.9,4588164,3.90533,10.92,-1.91,94.3714




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4598160
  custom_metrics: {}
  date: 2021-11-08_10-10-38
  done: false
  episode_len_mean: 91.37272727272727
  episode_media: {}
  episode_reward_max: 12.710000000000015
  episode_reward_mean: 3.656545454545463
  episode_reward_min: -1.4400000000000006
  episodes_this_iter: 110
  episodes_total: 49948
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.16886640410138
          entropy_coeff: 0.01
          kl: 0.01280255129410899
          policy_loss: -0.06778527485827605
          total_loss: 0.09991732478125864
          vf_explained_var: 0.9254178404808044
          vf_loss: 0.16022545163177399
    num_agent_steps_sampled: 4598160
    num_agent_steps_trained: 4598160
    num_steps_sampled: 4598160
    num_steps_trained: 45981

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,460,71811.2,4598160,3.65655,12.71,-1.44,91.3727




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4608156
  custom_metrics: {}
  date: 2021-11-08_10-13-43
  done: false
  episode_len_mean: 89.45132743362832
  episode_media: {}
  episode_reward_max: 9.43000000000001
  episode_reward_mean: 3.653716814159301
  episode_reward_min: -1.760000000000001
  episodes_this_iter: 113
  episodes_total: 50061
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.140358300698109
          entropy_coeff: 0.01
          kl: 0.013291239887731109
          policy_loss: -0.06492210039072949
          total_loss: 0.10723746806606013
          vf_explained_var: 0.9310703277587891
          vf_loss: 0.16328404530500754
    num_agent_steps_sampled: 4608156
    num_agent_steps_trained: 4608156
    num_steps_sampled: 4608156
    num_steps_trained: 460815

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,461,71996.5,4608156,3.65372,9.43,-1.76,89.4513


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4618152
  custom_metrics: {}
  date: 2021-11-08_10-16-04
  done: false
  episode_len_mean: 92.06542056074767
  episode_media: {}
  episode_reward_max: 12.870000000000015
  episode_reward_mean: 3.859813084112158
  episode_reward_min: -1.4800000000000006
  episodes_this_iter: 107
  episodes_total: 50168
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.164970399885096
          entropy_coeff: 0.01
          kl: 0.013840736585796569
          policy_loss: -0.06550184281495137
          total_loss: 0.11042094755058106
          vf_explained_var: 0.9245805740356445
          vf_loss: 0.16604156686454757
    num_agent_steps_sampled: 4618152
    num_agent_steps_trained: 4618152
    num_steps_sampled: 4618152
    num_steps_trained: 461

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,462,72137.5,4618152,3.85981,12.87,-1.48,92.0654




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4628148
  custom_metrics: {}
  date: 2021-11-08_10-18-41
  done: false
  episode_len_mean: 92.53703703703704
  episode_media: {}
  episode_reward_max: 11.040000000000012
  episode_reward_mean: 3.894629629629639
  episode_reward_min: -1.6300000000000006
  episodes_this_iter: 108
  episodes_total: 50276
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1662290099339607
          entropy_coeff: 0.01
          kl: 0.013350180508105217
          policy_loss: -0.06013648437065446
          total_loss: 0.11678770604169267
          vf_explained_var: 0.9293761849403381
          vf_loss: 0.16817309990779966
    num_agent_steps_sampled: 4628148
    num_agent_steps_trained: 4628148
    num_steps_sampled: 4628148
    num_steps_trained: 46

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,463,72294.6,4628148,3.89463,11.04,-1.63,92.537




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4638144
  custom_metrics: {}
  date: 2021-11-08_10-21-36
  done: false
  episode_len_mean: 90.64864864864865
  episode_media: {}
  episode_reward_max: 12.040000000000024
  episode_reward_mean: 3.874684684684693
  episode_reward_min: -1.760000000000001
  episodes_this_iter: 111
  episodes_total: 50387
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1584084670767827
          entropy_coeff: 0.01
          kl: 0.012262471012156492
          policy_loss: -0.06137523013843685
          total_loss: 0.10451589904717591
          vf_explained_var: 0.9095877408981323
          vf_loss: 0.15953977034769506
    num_agent_steps_sampled: 4638144
    num_agent_steps_trained: 4638144
    num_steps_sampled: 4638144
    num_steps_trained: 463

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,464,72469.6,4638144,3.87468,12.04,-1.76,90.6486




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4648140
  custom_metrics: {}
  date: 2021-11-08_10-24-23
  done: false
  episode_len_mean: 90.8
  episode_media: {}
  episode_reward_max: 14.800000000000017
  episode_reward_mean: 4.096727272727282
  episode_reward_min: -1.4900000000000007
  episodes_this_iter: 110
  episodes_total: 50497
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1338452131320267
          entropy_coeff: 0.01
          kl: 0.011843527935854848
          policy_loss: -0.06517703703039476
          total_loss: 0.0817809243981018
          vf_explained_var: 0.9311550855636597
          vf_loss: 0.1413153757221806
    num_agent_steps_sampled: 4648140
    num_agent_steps_trained: 4648140
    num_steps_sampled: 4648140
    num_steps_trained: 4648140
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,465,72636.3,4648140,4.09673,14.8,-1.49,90.8




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4658136
  custom_metrics: {}
  date: 2021-11-08_10-27-00
  done: false
  episode_len_mean: 89.28571428571429
  episode_media: {}
  episode_reward_max: 13.15000000000001
  episode_reward_mean: 4.0410714285714375
  episode_reward_min: -1.3500000000000005
  episodes_this_iter: 112
  episodes_total: 50609
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.143673924299387
          entropy_coeff: 0.01
          kl: 0.013211516966026765
          policy_loss: -0.06277374599574723
          total_loss: 0.08753540336003161
          vf_explained_var: 0.9341976046562195
          vf_loss: 0.1416484016001734
    num_agent_steps_sampled: 4658136
    num_agent_steps_trained: 4658136
    num_steps_sampled: 4658136
    num_steps_trained: 4658

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,466,72793.6,4658136,4.04107,13.15,-1.35,89.2857




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4668132
  custom_metrics: {}
  date: 2021-11-08_10-29-46
  done: false
  episode_len_mean: 88.84955752212389
  episode_media: {}
  episode_reward_max: 14.810000000000013
  episode_reward_mean: 4.08654867256638
  episode_reward_min: -1.4000000000000006
  episodes_this_iter: 113
  episodes_total: 50722
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1077513232190386
          entropy_coeff: 0.01
          kl: 0.013103184825175397
          policy_loss: -0.06451088701311149
          total_loss: 0.1310163989328803
          vf_explained_var: 0.9260838031768799
          vf_loss: 0.1867541063242616
    num_agent_steps_sampled: 4668132
    num_agent_steps_trained: 4668132
    num_steps_sampled: 4668132
    num_steps_trained: 46681

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,467,72959.6,4668132,4.08655,14.81,-1.4,88.8496




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4678128
  custom_metrics: {}
  date: 2021-11-08_10-32-22
  done: false
  episode_len_mean: 90.6
  episode_media: {}
  episode_reward_max: 12.940000000000015
  episode_reward_mean: 4.0310000000000095
  episode_reward_min: -1.3700000000000006
  episodes_this_iter: 110
  episodes_total: 50832
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1428655000833365
          entropy_coeff: 0.01
          kl: 0.012975541616002102
          policy_loss: -0.06455867163932476
          total_loss: 0.09301891850832945
          vf_explained_var: 0.93223637342453
          vf_loss: 0.1494463397221815
    num_agent_steps_sampled: 4678128
    num_agent_steps_trained: 4678128
    num_steps_sampled: 4678128
    num_steps_trained: 4678128
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,468,73115.6,4678128,4.031,12.94,-1.37,90.6


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4688124
  custom_metrics: {}
  date: 2021-11-08_10-34-43
  done: false
  episode_len_mean: 91.14678899082568
  episode_media: {}
  episode_reward_max: 10.470000000000015
  episode_reward_mean: 3.367339449541292
  episode_reward_min: -1.4500000000000006
  episodes_this_iter: 109
  episodes_total: 50941
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1379743695259092
          entropy_coeff: 0.01
          kl: 0.012795286065115291
          policy_loss: -0.06510438378581888
          total_loss: 0.10391335004510788
          vf_explained_var: 0.9272468090057373
          vf_loss: 0.16124821599397776
    num_agent_steps_sampled: 4688124
    num_agent_steps_trained: 4688124
    num_steps_sampled: 4688124
    num_steps_trained: 46

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,469,73256.1,4688124,3.36734,10.47,-1.45,91.1468




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4698120
  custom_metrics: {}
  date: 2021-11-08_10-37-22
  done: false
  episode_len_mean: 87.3913043478261
  episode_media: {}
  episode_reward_max: 10.830000000000014
  episode_reward_mean: 3.882695652173922
  episode_reward_min: -1.5200000000000005
  episodes_this_iter: 115
  episodes_total: 51056
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.120785379817343
          entropy_coeff: 0.01
          kl: 0.012595667414883086
          policy_loss: -0.06361818275231326
          total_loss: 0.11376962268597678
          vf_explained_var: 0.93763667345047
          vf_loss: 0.16990115309659487
    num_agent_steps_sampled: 4698120
    num_agent_steps_trained: 4698120
    num_steps_sampled: 4698120
    num_steps_trained: 469812

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,470,73415.5,4698120,3.8827,10.83,-1.52,87.3913




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4708116
  custom_metrics: {}
  date: 2021-11-08_10-40-32
  done: false
  episode_len_mean: 86.91228070175438
  episode_media: {}
  episode_reward_max: 16.78
  episode_reward_mean: 4.490438596491238
  episode_reward_min: -1.4200000000000004
  episodes_this_iter: 114
  episodes_total: 51170
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1235311553009555
          entropy_coeff: 0.01
          kl: 0.013517456139090741
          policy_loss: -0.06447497509801998
          total_loss: 0.08833596034882925
          vf_explained_var: 0.9453569650650024
          vf_loss: 0.1432517914977084
    num_agent_steps_sampled: 4708116
    num_agent_steps_trained: 4708116
    num_steps_sampled: 4708116
    num_steps_trained: 4708116
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,471,73605.2,4708116,4.49044,16.78,-1.42,86.9123


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4718112
  custom_metrics: {}
  date: 2021-11-08_10-42-56
  done: false
  episode_len_mean: 89.6283185840708
  episode_media: {}
  episode_reward_max: 14.780000000000017
  episode_reward_mean: 4.155309734513283
  episode_reward_min: -1.2900000000000003
  episodes_this_iter: 113
  episodes_total: 51283
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.133701386105301
          entropy_coeff: 0.01
          kl: 0.013777094876546875
          policy_loss: -0.061777785427581805
          total_loss: 0.12930268774557319
          vf_explained_var: 0.9169219732284546
          vf_loss: 0.18103154168392604
    num_agent_steps_sampled: 4718112
    num_agent_steps_trained: 4718112
    num_steps_sampled: 4718112
    num_steps_trained: 471

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,472,73749.3,4718112,4.15531,14.78,-1.29,89.6283




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4728108
  custom_metrics: {}
  date: 2021-11-08_10-45-34
  done: false
  episode_len_mean: 89.04464285714286
  episode_media: {}
  episode_reward_max: 13.030000000000012
  episode_reward_mean: 4.1781250000000085
  episode_reward_min: -1.5800000000000007
  episodes_this_iter: 112
  episodes_total: 51395
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1305832088503065
          entropy_coeff: 0.01
          kl: 0.013296706005031212
          policy_loss: -0.066083055951943
          total_loss: 0.12839534296375563
          vf_explained_var: 0.9249223470687866
          vf_loss: 0.18549267247510262
    num_agent_steps_sampled: 4728108
    num_agent_steps_trained: 4728108
    num_steps_sampled: 4728108
    num_steps_trained: 472

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,473,73906.9,4728108,4.17813,13.03,-1.58,89.0446




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4738104
  custom_metrics: {}
  date: 2021-11-08_10-48-22
  done: false
  episode_len_mean: 90.68181818181819
  episode_media: {}
  episode_reward_max: 11.090000000000012
  episode_reward_mean: 4.081818181818192
  episode_reward_min: -1.3300000000000005
  episodes_this_iter: 110
  episodes_total: 51505
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.132609485866677
          entropy_coeff: 0.01
          kl: 0.014075651427993744
          policy_loss: -0.06134442587215931
          total_loss: 0.15628946418197365
          vf_explained_var: 0.9149696826934814
          vf_loss: 0.20689389033036099
    num_agent_steps_sampled: 4738104
    num_agent_steps_trained: 4738104
    num_steps_sampled: 4738104
    num_steps_trained: 473

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,474,74074.9,4738104,4.08182,11.09,-1.33,90.6818




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4748100
  custom_metrics: {}
  date: 2021-11-08_10-50-54
  done: false
  episode_len_mean: 90.17117117117117
  episode_media: {}
  episode_reward_max: 13.110000000000014
  episode_reward_mean: 3.9070270270270364
  episode_reward_min: -1.3200000000000005
  episodes_this_iter: 111
  episodes_total: 51616
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1529773223094453
          entropy_coeff: 0.01
          kl: 0.011825928042510314
          policy_loss: -0.06845026857291277
          total_loss: 0.07869347001497562
          vf_explained_var: 0.9398617148399353
          vf_loss: 0.1417325693429408
    num_agent_steps_sampled: 4748100
    num_agent_steps_trained: 4748100
    num_steps_sampled: 4748100
    num_steps_trained: 47

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,475,74227.2,4748100,3.90703,13.11,-1.32,90.1712




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4758096
  custom_metrics: {}
  date: 2021-11-08_10-53-32
  done: false
  episode_len_mean: 88.64035087719299
  episode_media: {}
  episode_reward_max: 10.580000000000013
  episode_reward_mean: 3.7858771929824644
  episode_reward_min: -1.7900000000000007
  episodes_this_iter: 114
  episodes_total: 51730
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1270130341888493
          entropy_coeff: 0.01
          kl: 0.012846018203173538
          policy_loss: -0.06326687233442935
          total_loss: 0.10926288186898853
          vf_explained_var: 0.9253146648406982
          vf_loss: 0.164535049130965
    num_agent_steps_sampled: 4758096
    num_agent_steps_trained: 4758096
    num_steps_sampled: 4758096
    num_steps_trained: 475

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,476,74384.6,4758096,3.78588,10.58,-1.79,88.6404




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4768092
  custom_metrics: {}
  date: 2021-11-08_10-56-21
  done: false
  episode_len_mean: 89.67272727272727
  episode_media: {}
  episode_reward_max: 11.000000000000012
  episode_reward_mean: 4.1999090909091
  episode_reward_min: -1.7000000000000008
  episodes_this_iter: 110
  episodes_total: 51840
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1106501101428625
          entropy_coeff: 0.01
          kl: 0.012841942657539376
          policy_loss: -0.06360638247022772
          total_loss: 0.10444477772196899
          vf_explained_var: 0.9305068254470825
          vf_loss: 0.15990211044271022
    num_agent_steps_sampled: 4768092
    num_agent_steps_trained: 4768092
    num_steps_sampled: 4768092
    num_steps_trained: 4768

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,477,74553.4,4768092,4.19991,11,-1.7,89.6727


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4778088
  custom_metrics: {}
  date: 2021-11-08_10-58-43
  done: false
  episode_len_mean: 89.78571428571429
  episode_media: {}
  episode_reward_max: 12.900000000000013
  episode_reward_mean: 3.9608035714285803
  episode_reward_min: -0.9900000000000003
  episodes_this_iter: 112
  episodes_total: 51952
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.124086292177184
          entropy_coeff: 0.01
          kl: 0.013272330863105957
          policy_loss: -0.06476323477064187
          total_loss: 0.10886911360037504
          vf_explained_var: 0.9256473779678345
          vf_loss: 0.1646371808492093
    num_agent_steps_sampled: 4778088
    num_agent_steps_trained: 4778088
    num_steps_sampled: 4778088
    num_steps_trained: 477

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,478,74696.2,4778088,3.9608,12.9,-0.99,89.7857


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4788084
  custom_metrics: {}
  date: 2021-11-08_11-01-11
  done: false
  episode_len_mean: 89.35135135135135
  episode_media: {}
  episode_reward_max: 14.890000000000011
  episode_reward_mean: 4.010090090090098
  episode_reward_min: -1.3700000000000008
  episodes_this_iter: 111
  episodes_total: 52063
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.130750102365119
          entropy_coeff: 0.01
          kl: 0.012663499350057688
          policy_loss: -0.06289129646606426
          total_loss: 0.11546034041441913
          vf_explained_var: 0.9413130879402161
          vf_loss: 0.17081010264909674
    num_agent_steps_sampled: 4788084
    num_agent_steps_trained: 4788084
    num_steps_sampled: 4788084
    num_steps_trained: 478

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,479,74843.3,4788084,4.01009,14.89,-1.37,89.3514




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4798080
  custom_metrics: {}
  date: 2021-11-08_11-04-01
  done: false
  episode_len_mean: 87.57391304347826
  episode_media: {}
  episode_reward_max: 10.650000000000015
  episode_reward_mean: 3.868086956521748
  episode_reward_min: -1.5300000000000007
  episodes_this_iter: 115
  episodes_total: 52178
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.151420757199964
          entropy_coeff: 0.01
          kl: 0.011874534800408953
          policy_loss: -0.06646596379737314
          total_loss: 0.09116730711255701
          vf_explained_var: 0.9237534999847412
          vf_loss: 0.15209580180195406
    num_agent_steps_sampled: 4798080
    num_agent_steps_trained: 4798080
    num_steps_sampled: 4798080
    num_steps_trained: 479

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,480,75014,4798080,3.86809,10.65,-1.53,87.5739




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4808076
  custom_metrics: {}
  date: 2021-11-08_11-06-43
  done: false
  episode_len_mean: 90.83636363636364
  episode_media: {}
  episode_reward_max: 12.100000000000017
  episode_reward_mean: 3.846545454545464
  episode_reward_min: -2.34
  episodes_this_iter: 110
  episodes_total: 52288
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.125219185331948
          entropy_coeff: 0.01
          kl: 0.013407694139478003
          policy_loss: -0.06299885592988541
          total_loss: 0.11285757099349912
          vf_explained_var: 0.938420832157135
          vf_loss: 0.16656421476608924
    num_agent_steps_sampled: 4808076
    num_agent_steps_trained: 4808076
    num_steps_sampled: 4808076
    num_steps_trained: 4808076
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,481,75175.8,4808076,3.84655,12.1,-2.34,90.8364




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4818072
  custom_metrics: {}
  date: 2021-11-08_11-09-19
  done: false
  episode_len_mean: 90.03603603603604
  episode_media: {}
  episode_reward_max: 14.390000000000017
  episode_reward_mean: 4.345495495495506
  episode_reward_min: -1.4800000000000004
  episodes_this_iter: 111
  episodes_total: 52399
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1300369053824335
          entropy_coeff: 0.01
          kl: 0.013861023372420534
          policy_loss: -0.0637483601943932
          total_loss: 0.12870579321797077
          vf_explained_var: 0.933076024055481
          vf_loss: 0.18217737818789534
    num_agent_steps_sampled: 4818072
    num_agent_steps_trained: 4818072
    num_steps_sampled: 4818072
    num_steps_trained: 4818

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,482,75332,4818072,4.3455,14.39,-1.48,90.036




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4828068
  custom_metrics: {}
  date: 2021-11-08_11-11-58
  done: false
  episode_len_mean: 88.98214285714286
  episode_media: {}
  episode_reward_max: 10.930000000000012
  episode_reward_mean: 3.5931250000000086
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 112
  episodes_total: 52511
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1235514255670402
          entropy_coeff: 0.01
          kl: 0.012664122866219487
          policy_loss: -0.06453496689597765
          total_loss: 0.11062305758778866
          vf_explained_var: 0.9200646281242371
          vf_loss: 0.1675430819646925
    num_agent_steps_sampled: 4828068
    num_agent_steps_trained: 4828068
    num_steps_sampled: 4828068
    num_steps_trained: 48

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,483,75491,4828068,3.59313,10.93,-2.04,88.9821




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4838064
  custom_metrics: {}
  date: 2021-11-08_11-14-38
  done: false
  episode_len_mean: 87.85964912280701
  episode_media: {}
  episode_reward_max: 10.690000000000014
  episode_reward_mean: 3.4891228070175524
  episode_reward_min: -1.3800000000000006
  episodes_this_iter: 114
  episodes_total: 52625
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.106908911313766
          entropy_coeff: 0.01
          kl: 0.013660442315372388
          policy_loss: -0.06414299761223742
          total_loss: 0.12691934530774499
          vf_explained_var: 0.9160206317901611
          vf_loss: 0.18101123401847405
    num_agent_steps_sampled: 4838064
    num_agent_steps_trained: 4838064
    num_steps_sampled: 4838064
    num_steps_trained: 48

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,484,75650.7,4838064,3.48912,10.69,-1.38,87.8596




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4848060
  custom_metrics: {}
  date: 2021-11-08_11-17-19
  done: false
  episode_len_mean: 86.89565217391305
  episode_media: {}
  episode_reward_max: 12.390000000000018
  episode_reward_mean: 3.4321739130434863
  episode_reward_min: -1.3000000000000005
  episodes_this_iter: 115
  episodes_total: 52740
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1230008279156483
          entropy_coeff: 0.01
          kl: 0.01346538196732885
          policy_loss: -0.0610503547005037
          total_loss: 0.147138537819155
          vf_explained_var: 0.9079574942588806
          vf_loss: 0.1987430760748366
    num_agent_steps_sampled: 4848060
    num_agent_steps_trained: 4848060
    num_steps_sampled: 4848060
    num_steps_trained: 484806

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,485,75811.1,4848060,3.43217,12.39,-1.3,86.8957




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4858056
  custom_metrics: {}
  date: 2021-11-08_11-20-02
  done: false
  episode_len_mean: 89.21621621621621
  episode_media: {}
  episode_reward_max: 13.130000000000013
  episode_reward_mean: 4.198108108108118
  episode_reward_min: -1.3600000000000008
  episodes_this_iter: 111
  episodes_total: 52851
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1187694212310335
          entropy_coeff: 0.01
          kl: 0.012492636166812395
          policy_loss: -0.06414870481985883
          total_loss: 0.0954791819748397
          vf_explained_var: 0.934741199016571
          vf_loss: 0.15235579192924958
    num_agent_steps_sampled: 4858056
    num_agent_steps_trained: 4858056
    num_steps_sampled: 4858056
    num_steps_trained: 4858

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,486,75974.8,4858056,4.19811,13.13,-1.36,89.2162




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4868052
  custom_metrics: {}
  date: 2021-11-08_11-22-54
  done: false
  episode_len_mean: 86.02564102564102
  episode_media: {}
  episode_reward_max: 13.30000000000001
  episode_reward_mean: 3.860170940170948
  episode_reward_min: -1.410000000000001
  episodes_this_iter: 117
  episodes_total: 52968
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.091794089272491
          entropy_coeff: 0.01
          kl: 0.012829916147487783
          policy_loss: -0.06409896334999392
          total_loss: 0.10354667310563163
          vf_explained_var: 0.9293210506439209
          vf_loss: 0.15933542462368297
    num_agent_steps_sampled: 4868052
    num_agent_steps_trained: 4868052
    num_steps_sampled: 4868052
    num_steps_trained: 48680

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,487,76146.8,4868052,3.86017,13.3,-1.41,86.0256




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4878048
  custom_metrics: {}
  date: 2021-11-08_11-25-35
  done: false
  episode_len_mean: 88.07964601769912
  episode_media: {}
  episode_reward_max: 13.060000000000015
  episode_reward_mean: 3.8053097345132825
  episode_reward_min: -1.2700000000000007
  episodes_this_iter: 113
  episodes_total: 53081
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1438143125966063
          entropy_coeff: 0.01
          kl: 0.013408583796274862
          policy_loss: -0.06647935136197469
          total_loss: 0.11998540547827626
          vf_explained_var: 0.92138671875
          vf_loss: 0.17735646866007237
    num_agent_steps_sampled: 4878048
    num_agent_steps_trained: 4878048
    num_steps_sampled: 4878048
    num_steps_trained: 487804

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,488,76307.5,4878048,3.80531,13.06,-1.27,88.0796




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4888044
  custom_metrics: {}
  date: 2021-11-08_11-28-29
  done: false
  episode_len_mean: 88.95535714285714
  episode_media: {}
  episode_reward_max: 14.740000000000013
  episode_reward_mean: 3.6676785714285804
  episode_reward_min: -1.1900000000000006
  episodes_this_iter: 112
  episodes_total: 53193
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1302254525005306
          entropy_coeff: 0.01
          kl: 0.013411749028409085
          policy_loss: -0.05885007559823302
          total_loss: 0.1271609879998315
          vf_explained_var: 0.918953537940979
          vf_loss: 0.17675967655566513
    num_agent_steps_sampled: 4888044
    num_agent_steps_trained: 4888044
    num_steps_sampled: 4888044
    num_steps_trained: 488

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,489,76481.8,4888044,3.66768,14.74,-1.19,88.9554




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4898040
  custom_metrics: {}
  date: 2021-11-08_11-31-13
  done: false
  episode_len_mean: 89.76106194690266
  episode_media: {}
  episode_reward_max: 12.740000000000014
  episode_reward_mean: 4.1235398230088585
  episode_reward_min: -1.4500000000000004
  episodes_this_iter: 113
  episodes_total: 53306
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.117258612620525
          entropy_coeff: 0.01
          kl: 0.013694049663241994
          policy_loss: -0.06125812490399067
          total_loss: 0.11735161678531231
          vf_explained_var: 0.9298210740089417
          vf_loss: 0.16858557017090228
    num_agent_steps_sampled: 4898040
    num_agent_steps_trained: 4898040
    num_steps_sampled: 4898040
    num_steps_trained: 48

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,490,76645.5,4898040,4.12354,12.74,-1.45,89.7611




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4908036
  custom_metrics: {}
  date: 2021-11-08_11-33-50
  done: false
  episode_len_mean: 88.32743362831859
  episode_media: {}
  episode_reward_max: 12.900000000000016
  episode_reward_mean: 3.832477876106203
  episode_reward_min: -1.7000000000000006
  episodes_this_iter: 113
  episodes_total: 53419
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.131615988821046
          entropy_coeff: 0.01
          kl: 0.013049969689465828
          policy_loss: -0.06276037994549315
          total_loss: 0.11484681850808681
          vf_explained_var: 0.9183028340339661
          vf_loss: 0.16919389516115188
    num_agent_steps_sampled: 4908036
    num_agent_steps_trained: 4908036
    num_steps_sampled: 4908036
    num_steps_trained: 490

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,491,76802.1,4908036,3.83248,12.9,-1.7,88.3274


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4918032
  custom_metrics: {}
  date: 2021-11-08_11-36-12
  done: false
  episode_len_mean: 91.89908256880734
  episode_media: {}
  episode_reward_max: 13.040000000000012
  episode_reward_mean: 3.924311926605514
  episode_reward_min: -1.3000000000000007
  episodes_this_iter: 109
  episodes_total: 53528
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1400276463255925
          entropy_coeff: 0.01
          kl: 0.013264010514972398
          policy_loss: -0.062138325576949066
          total_loss: 0.10452656916891917
          vf_explained_var: 0.923557698726654
          vf_loss: 0.1578480965625017
    num_agent_steps_sampled: 4918032
    num_agent_steps_trained: 4918032
    num_steps_sampled: 4918032
    num_steps_trained: 491

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,492,76943.8,4918032,3.92431,13.04,-1.3,91.8991




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4928028
  custom_metrics: {}
  date: 2021-11-08_11-38-52
  done: false
  episode_len_mean: 89.2072072072072
  episode_media: {}
  episode_reward_max: 10.480000000000016
  episode_reward_mean: 4.027297297297306
  episode_reward_min: -1.3600000000000005
  episodes_this_iter: 111
  episodes_total: 53639
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1427467939181204
          entropy_coeff: 0.01
          kl: 0.013071401390221563
          policy_loss: -0.06593849297740266
          total_loss: 0.11175167959613296
          vf_explained_var: 0.9289017915725708
          vf_loss: 0.1693393528763937
    num_agent_steps_sampled: 4928028
    num_agent_steps_trained: 4928028
    num_steps_sampled: 4928028
    num_steps_trained: 4928

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,493,77103.9,4928028,4.0273,10.48,-1.36,89.2072




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4938024
  custom_metrics: {}
  date: 2021-11-08_11-41-39
  done: false
  episode_len_mean: 92.33333333333333
  episode_media: {}
  episode_reward_max: 14.870000000000017
  episode_reward_mean: 4.277962962962973
  episode_reward_min: -1.4300000000000006
  episodes_this_iter: 108
  episodes_total: 53747
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.106743803492978
          entropy_coeff: 0.01
          kl: 0.013838377746964507
          policy_loss: -0.06601509942840307
          total_loss: 0.12274564688340721
          vf_explained_var: 0.9306842088699341
          vf_loss: 0.1783026307184472
    num_agent_steps_sampled: 4938024
    num_agent_steps_trained: 4938024
    num_steps_sampled: 4938024
    num_steps_trained: 4938

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,494,77271.5,4938024,4.27796,14.87,-1.43,92.3333




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4948020
  custom_metrics: {}
  date: 2021-11-08_11-44-29
  done: false
  episode_len_mean: 89.4424778761062
  episode_media: {}
  episode_reward_max: 16.30000000000001
  episode_reward_mean: 4.388318584070806
  episode_reward_min: 0.059999999999999255
  episodes_this_iter: 113
  episodes_total: 53860
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.122417413067614
          entropy_coeff: 0.01
          kl: 0.014669350366387253
          policy_loss: -0.06404477788819972
          total_loss: 0.15830223190431028
          vf_explained_var: 0.9227873682975769
          vf_loss: 0.21015256817938172
    num_agent_steps_sampled: 4948020
    num_agent_steps_trained: 4948020
    num_steps_sampled: 4948020
    num_steps_trained: 4948

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,495,77441.2,4948020,4.38832,16.3,0.06,89.4425




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4958016
  custom_metrics: {}
  date: 2021-11-08_11-47-26
  done: false
  episode_len_mean: 92.35514018691589
  episode_media: {}
  episode_reward_max: 15.050000000000015
  episode_reward_mean: 4.208317757009355
  episode_reward_min: -1.8500000000000008
  episodes_this_iter: 107
  episodes_total: 53967
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1247477918608575
          entropy_coeff: 0.01
          kl: 0.014121859797859865
          policy_loss: -0.062545161964929
          total_loss: 0.14673957535280632
          vf_explained_var: 0.8975574374198914
          vf_loss: 0.19836085220942132
    num_agent_steps_sampled: 4958016
    num_agent_steps_trained: 4958016
    num_steps_sampled: 4958016
    num_steps_trained: 4958

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,496,77618.3,4958016,4.20832,15.05,-1.85,92.3551




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4968012
  custom_metrics: {}
  date: 2021-11-08_11-50-11
  done: false
  episode_len_mean: 91.76146788990826
  episode_media: {}
  episode_reward_max: 13.010000000000018
  episode_reward_mean: 4.064403669724781
  episode_reward_min: -1.7200000000000009
  episodes_this_iter: 109
  episodes_total: 54076
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1365451499947117
          entropy_coeff: 0.01
          kl: 0.013707092342073707
          policy_loss: -0.061141959782371406
          total_loss: 0.14534116670584832
          vf_explained_var: 0.9208692312240601
          vf_loss: 0.1966221071843408
    num_agent_steps_sampled: 4968012
    num_agent_steps_trained: 4968012
    num_steps_sampled: 4968012
    num_steps_trained: 49

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,497,77782.8,4968012,4.0644,13.01,-1.72,91.7615




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4978008
  custom_metrics: {}
  date: 2021-11-08_11-52-54
  done: false
  episode_len_mean: 91.35454545454546
  episode_media: {}
  episode_reward_max: 13.050000000000013
  episode_reward_mean: 3.8860909090909175
  episode_reward_min: -1.5600000000000007
  episodes_this_iter: 110
  episodes_total: 54186
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.114571952717936
          entropy_coeff: 0.01
          kl: 0.013786097292821126
          policy_loss: -0.06150772598866596
          total_loss: 0.12654122774385743
          vf_explained_var: 0.9160634279251099
          vf_loss: 0.17778821867675734
    num_agent_steps_sampled: 4978008
    num_agent_steps_trained: 4978008
    num_steps_sampled: 4978008
    num_steps_trained: 49

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,498,77945.6,4978008,3.88609,13.05,-1.56,91.3545




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4988004
  custom_metrics: {}
  date: 2021-11-08_11-55-26
  done: false
  episode_len_mean: 92.6574074074074
  episode_media: {}
  episode_reward_max: 16.80000000000001
  episode_reward_mean: 3.9679629629629725
  episode_reward_min: -1.4500000000000006
  episodes_this_iter: 108
  episodes_total: 54294
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1134957034363704
          entropy_coeff: 0.01
          kl: 0.013963421328782224
          policy_loss: -0.06113529131612462
          total_loss: 0.13990472574901378
          vf_explained_var: 0.9231837391853333
          vf_loss: 0.19036455371409144
    num_agent_steps_sampled: 4988004
    num_agent_steps_trained: 4988004
    num_steps_sampled: 4988004
    num_steps_trained: 498

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,499,78097.5,4988004,3.96796,16.8,-1.45,92.6574




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 4998000
  custom_metrics: {}
  date: 2021-11-08_11-58-15
  done: false
  episode_len_mean: 92.6574074074074
  episode_media: {}
  episode_reward_max: 12.640000000000018
  episode_reward_mean: 4.307314814814824
  episode_reward_min: -1.7200000000000009
  episodes_this_iter: 108
  episodes_total: 54402
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1206686849268075
          entropy_coeff: 0.01
          kl: 0.014112650683412218
          policy_loss: -0.06475139334320258
          total_loss: 0.13005886206674017
          vf_explained_var: 0.9158083200454712
          vf_loss: 0.18386655848982753
    num_agent_steps_sampled: 4998000
    num_agent_steps_trained: 4998000
    num_steps_sampled: 4998000
    num_steps_trained: 499

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,500,78267.1,4998000,4.30731,12.64,-1.72,92.6574




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5007996
  custom_metrics: {}
  date: 2021-11-08_12-00-52
  done: false
  episode_len_mean: 94.61904761904762
  episode_media: {}
  episode_reward_max: 12.680000000000017
  episode_reward_mean: 4.100095238095248
  episode_reward_min: -1.4600000000000006
  episodes_this_iter: 105
  episodes_total: 54507
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.122717368806529
          entropy_coeff: 0.01
          kl: 0.013724498659072406
          policy_loss: -0.06724515453012836
          total_loss: 0.11080540200520275
          vf_explained_var: 0.9328297972679138
          vf_loss: 0.16801160533522438
    num_agent_steps_sampled: 5007996
    num_agent_steps_trained: 5007996
    num_steps_sampled: 5007996
    num_steps_trained: 500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,501,78423.5,5007996,4.1001,12.68,-1.46,94.619




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5017992
  custom_metrics: {}
  date: 2021-11-08_12-03-54
  done: false
  episode_len_mean: 94.01886792452831
  episode_media: {}
  episode_reward_max: 14.160000000000021
  episode_reward_mean: 4.235849056603785
  episode_reward_min: -1.9600000000000013
  episodes_this_iter: 106
  episodes_total: 54613
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.148618249506013
          entropy_coeff: 0.01
          kl: 0.013022625041439369
          policy_loss: -0.06556636420771098
          total_loss: 0.09840850991268571
          vf_explained_var: 0.9349530339241028
          vf_loss: 0.15579388786075463
    num_agent_steps_sampled: 5017992
    num_agent_steps_trained: 5017992
    num_steps_sampled: 5017992
    num_steps_trained: 501

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,502,78605.5,5017992,4.23585,14.16,-1.96,94.0189




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5027988
  custom_metrics: {}
  date: 2021-11-08_12-06-50
  done: false
  episode_len_mean: 90.68468468468468
  episode_media: {}
  episode_reward_max: 12.900000000000015
  episode_reward_mean: 4.346486486486496
  episode_reward_min: -1.870000000000001
  episodes_this_iter: 111
  episodes_total: 54724
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.118124048118917
          entropy_coeff: 0.01
          kl: 0.014046631993819681
          policy_loss: -0.06461728049330731
          total_loss: 0.12485459443882235
          vf_explained_var: 0.9342869520187378
          vf_loss: 0.17865313217680678
    num_agent_steps_sampled: 5027988
    num_agent_steps_trained: 5027988
    num_steps_sampled: 5027988
    num_steps_trained: 5027

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,503,78781.5,5027988,4.34649,12.9,-1.87,90.6847




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5037984
  custom_metrics: {}
  date: 2021-11-08_12-09-38
  done: false
  episode_len_mean: 89.8018018018018
  episode_media: {}
  episode_reward_max: 12.930000000000012
  episode_reward_mean: 4.257387387387396
  episode_reward_min: -1.840000000000001
  episodes_this_iter: 111
  episodes_total: 54835
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1068250849715664
          entropy_coeff: 0.01
          kl: 0.012908847530161215
          policy_loss: -0.0639506733076822
          total_loss: 0.11635159236243647
          vf_explained_var: 0.939650297164917
          vf_loss: 0.17196254718602977
    num_agent_steps_sampled: 5037984
    num_agent_steps_trained: 5037984
    num_steps_sampled: 5037984
    num_steps_trained: 503798

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,504,78949.6,5037984,4.25739,12.93,-1.84,89.8018




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5047980
  custom_metrics: {}
  date: 2021-11-08_12-12-23
  done: false
  episode_len_mean: 90.84684684684684
  episode_media: {}
  episode_reward_max: 18.779999999999994
  episode_reward_mean: 4.45000000000001
  episode_reward_min: -1.2900000000000007
  episodes_this_iter: 111
  episodes_total: 54946
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.099194589944986
          entropy_coeff: 0.01
          kl: 0.013495478670084013
          policy_loss: -0.06640118513988634
          total_loss: 0.11546583600246753
          vf_explained_var: 0.9400841593742371
          vf_loss: 0.1721145789210613
    num_agent_steps_sampled: 5047980
    num_agent_steps_trained: 5047980
    num_steps_sampled: 5047980
    num_steps_trained: 50479

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,505,79115.2,5047980,4.45,18.78,-1.29,90.8468




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5057976
  custom_metrics: {}
  date: 2021-11-08_12-15-29
  done: false
  episode_len_mean: 88.4375
  episode_media: {}
  episode_reward_max: 18.529999999999983
  episode_reward_mean: 3.9705357142857225
  episode_reward_min: -1.970000000000001
  episodes_this_iter: 112
  episodes_total: 55058
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.10300282474257
          entropy_coeff: 0.01
          kl: 0.012457364515031016
          policy_loss: -0.06368333799525713
          total_loss: 0.11067916110205726
          vf_explained_var: 0.9340835213661194
          vf_loss: 0.1670130938816911
    num_agent_steps_sampled: 5057976
    num_agent_steps_trained: 5057976
    num_steps_sampled: 5057976
    num_steps_trained: 5057976
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,506,79300.9,5057976,3.97054,18.53,-1.97,88.4375




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5067972
  custom_metrics: {}
  date: 2021-11-08_12-18-12
  done: false
  episode_len_mean: 92.87962962962963
  episode_media: {}
  episode_reward_max: 14.340000000000014
  episode_reward_mean: 4.739907407407419
  episode_reward_min: -1.0200000000000005
  episodes_this_iter: 108
  episodes_total: 55166
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.124860401235075
          entropy_coeff: 0.01
          kl: 0.014105666214900317
          policy_loss: -0.06571241248016939
          total_loss: 0.12215237146338974
          vf_explained_var: 0.9369056224822998
          vf_loss: 0.17697891742946245
    num_agent_steps_sampled: 5067972
    num_agent_steps_trained: 5067972
    num_steps_sampled: 5067972
    num_steps_trained: 506

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,507,79463.4,5067972,4.73991,14.34,-1.02,92.8796




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5077968
  custom_metrics: {}
  date: 2021-11-08_12-20-46
  done: false
  episode_len_mean: 92.27777777777777
  episode_media: {}
  episode_reward_max: 12.610000000000017
  episode_reward_mean: 4.183981481481491
  episode_reward_min: -1.3100000000000005
  episodes_this_iter: 108
  episodes_total: 55274
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.145126925370632
          entropy_coeff: 0.01
          kl: 0.012419163446708318
          policy_loss: -0.06369213766267157
          total_loss: 0.09176171035864032
          vf_explained_var: 0.9333714246749878
          vf_loss: 0.14861270927179318
    num_agent_steps_sampled: 5077968
    num_agent_steps_trained: 5077968
    num_steps_sampled: 5077968
    num_steps_trained: 507

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,508,79617.6,5077968,4.18398,12.61,-1.31,92.2778




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5087964
  custom_metrics: {}
  date: 2021-11-08_12-23-49
  done: false
  episode_len_mean: 89.8125
  episode_media: {}
  episode_reward_max: 13.120000000000013
  episode_reward_mean: 4.085982142857152
  episode_reward_min: -1.3700000000000006
  episodes_this_iter: 112
  episodes_total: 55386
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.141884172777844
          entropy_coeff: 0.01
          kl: 0.013190798204714206
          policy_loss: -0.0647337655727871
          total_loss: 0.11088151699489253
          vf_explained_var: 0.934858500957489
          vf_loss: 0.16698383655017002
    num_agent_steps_sampled: 5087964
    num_agent_steps_trained: 5087964
    num_steps_sampled: 5087964
    num_steps_trained: 5087964
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,509,79800.7,5087964,4.08598,13.12,-1.37,89.8125




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5097960
  custom_metrics: {}
  date: 2021-11-08_12-26-45
  done: false
  episode_len_mean: 91.18348623853211
  episode_media: {}
  episode_reward_max: 12.65000000000002
  episode_reward_mean: 3.446972477064228
  episode_reward_min: -1.6200000000000006
  episodes_this_iter: 109
  episodes_total: 55495
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1479974014127357
          entropy_coeff: 0.01
          kl: 0.013261710037055719
          policy_loss: -0.0657515812649304
          total_loss: 0.10594756599738557
          vf_explained_var: 0.9234360456466675
          vf_loss: 0.16296728585934284
    num_agent_steps_sampled: 5097960
    num_agent_steps_trained: 5097960
    num_steps_sampled: 5097960
    num_steps_trained: 5097

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,510,79976.3,5097960,3.44697,12.65,-1.62,91.1835


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5107956
  custom_metrics: {}
  date: 2021-11-08_12-29-08
  done: false
  episode_len_mean: 96.48076923076923
  episode_media: {}
  episode_reward_max: 14.680000000000016
  episode_reward_mean: 4.69625000000001
  episode_reward_min: -1.4200000000000006
  episodes_this_iter: 104
  episodes_total: 55599
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1309441009138386
          entropy_coeff: 0.01
          kl: 0.01435272251824833
          policy_loss: -0.06415561207880577
          total_loss: 0.13657710559697997
          vf_explained_var: 0.931501030921936
          vf_loss: 0.18934486228495073
    num_agent_steps_sampled: 5107956
    num_agent_steps_trained: 5107956
    num_steps_sampled: 5107956
    num_steps_trained: 51079

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,511,80119.1,5107956,4.69625,14.68,-1.42,96.4808




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5117952
  custom_metrics: {}
  date: 2021-11-08_12-31-46
  done: false
  episode_len_mean: 90.8440366972477
  episode_media: {}
  episode_reward_max: 14.640000000000017
  episode_reward_mean: 3.9059633027523017
  episode_reward_min: -2.020000000000001
  episodes_this_iter: 109
  episodes_total: 55708
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1552447345521717
          entropy_coeff: 0.01
          kl: 0.012849499263374526
          policy_loss: -0.06120422552538733
          total_loss: 0.11273037917020484
          vf_explained_var: 0.9324292540550232
          vf_loss: 0.16621428549003142
    num_agent_steps_sampled: 5117952
    num_agent_steps_trained: 5117952
    num_steps_sampled: 5117952
    num_steps_trained: 511

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,512,80276.9,5117952,3.90596,14.64,-2.02,90.844




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5127948
  custom_metrics: {}
  date: 2021-11-08_12-34-32
  done: false
  episode_len_mean: 91.05405405405405
  episode_media: {}
  episode_reward_max: 12.67000000000002
  episode_reward_mean: 4.420630630630641
  episode_reward_min: -1.2400000000000004
  episodes_this_iter: 111
  episodes_total: 55819
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.116588531803881
          entropy_coeff: 0.01
          kl: 0.014588874855752392
          policy_loss: -0.062299148579183804
          total_loss: 0.14639612471000252
          vf_explained_var: 0.923125147819519
          vf_loss: 0.19662587679126578
    num_agent_steps_sampled: 5127948
    num_agent_steps_trained: 5127948
    num_steps_sampled: 5127948
    num_steps_trained: 5127

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,513,80442.9,5127948,4.42063,12.67,-1.24,91.0541




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5137944
  custom_metrics: {}
  date: 2021-11-08_12-37-11
  done: false
  episode_len_mean: 91.1
  episode_media: {}
  episode_reward_max: 14.600000000000017
  episode_reward_mean: 5.024818181818193
  episode_reward_min: -0.8100000000000005
  episodes_this_iter: 110
  episodes_total: 55929
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1293307315590035
          entropy_coeff: 0.01
          kl: 0.013611154981222095
          policy_loss: -0.063759255144968
          total_loss: 0.12959017267800932
          vf_explained_var: 0.9389562606811523
          vf_loss: 0.18363482080813912
    num_agent_steps_sampled: 5137944
    num_agent_steps_trained: 5137944
    num_steps_sampled: 5137944
    num_steps_trained: 5137944
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,514,80602.8,5137944,5.02482,14.6,-0.81,91.1




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5147940
  custom_metrics: {}
  date: 2021-11-08_12-39-46
  done: false
  episode_len_mean: 90.14545454545454
  episode_media: {}
  episode_reward_max: 15.020000000000012
  episode_reward_mean: 4.607272727272736
  episode_reward_min: -1.6300000000000008
  episodes_this_iter: 110
  episodes_total: 56039
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.107382189412402
          entropy_coeff: 0.01
          kl: 0.013446423356463622
          policy_loss: -0.064046177533893
          total_loss: 0.10879296592763092
          vf_explained_var: 0.9464465975761414
          vf_loss: 0.16328033143097265
    num_agent_steps_sampled: 5147940
    num_agent_steps_trained: 5147940
    num_steps_sampled: 5147940
    num_steps_trained: 51479

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,515,80756.8,5147940,4.60727,15.02,-1.63,90.1455




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5157936
  custom_metrics: {}
  date: 2021-11-08_12-42-21
  done: false
  episode_len_mean: 91.22727272727273
  episode_media: {}
  episode_reward_max: 13.010000000000016
  episode_reward_mean: 4.325272727272737
  episode_reward_min: -1.260000000000001
  episodes_this_iter: 110
  episodes_total: 56149
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.121546081192473
          entropy_coeff: 0.01
          kl: 0.013831795789555129
          policy_loss: -0.06321733113951408
          total_loss: 0.12992740764609004
          vf_explained_var: 0.9294537901878357
          vf_loss: 0.18284963903964585
    num_agent_steps_sampled: 5157936
    num_agent_steps_trained: 5157936
    num_steps_sampled: 5157936
    num_steps_trained: 5157

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,516,80912.3,5157936,4.32527,13.01,-1.26,91.2273




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5167932
  custom_metrics: {}
  date: 2021-11-08_12-45-15
  done: false
  episode_len_mean: 92.5
  episode_media: {}
  episode_reward_max: 16.570000000000007
  episode_reward_mean: 4.7620370370370475
  episode_reward_min: -1.5000000000000009
  episodes_this_iter: 108
  episodes_total: 56257
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1112727558510933
          entropy_coeff: 0.01
          kl: 0.014972380058203734
          policy_loss: -0.06281865862731495
          total_loss: 0.1569626497088844
          vf_explained_var: 0.9303189516067505
          vf_loss: 0.20678508086846425
    num_agent_steps_sampled: 5167932
    num_agent_steps_trained: 5167932
    num_steps_sampled: 5167932
    num_steps_trained: 5167932
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,517,81085.8,5167932,4.76204,16.57,-1.5,92.5


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5177928
  custom_metrics: {}
  date: 2021-11-08_12-47-39
  done: false
  episode_len_mean: 92.55140186915888
  episode_media: {}
  episode_reward_max: 14.700000000000012
  episode_reward_mean: 4.221308411214962
  episode_reward_min: -1.770000000000001
  episodes_this_iter: 107
  episodes_total: 56364
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.123118661203955
          entropy_coeff: 0.01
          kl: 0.013749952311232948
          policy_loss: -0.05825027614736404
          total_loss: 0.15838154334590857
          vf_explained_var: 0.920340895652771
          vf_loss: 0.20653889369315062
    num_agent_steps_sampled: 5177928
    num_agent_steps_trained: 5177928
    num_steps_sampled: 5177928
    num_steps_trained: 51779

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,518,81229.7,5177928,4.22131,14.7,-1.77,92.5514




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5187924
  custom_metrics: {}
  date: 2021-11-08_12-50-22
  done: false
  episode_len_mean: 93.45370370370371
  episode_media: {}
  episode_reward_max: 13.200000000000012
  episode_reward_mean: 4.577685185185196
  episode_reward_min: -1.3400000000000005
  episodes_this_iter: 108
  episodes_total: 56472
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.11795610658124
          entropy_coeff: 0.01
          kl: 0.014236283935989564
          policy_loss: -0.06187961858848476
          total_loss: 0.16955589204867427
          vf_explained_var: 0.9234325885772705
          vf_loss: 0.22018303617389284
    num_agent_steps_sampled: 5187924
    num_agent_steps_trained: 5187924
    num_steps_sampled: 5187924
    num_steps_trained: 5187

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,519,81393.2,5187924,4.57769,13.2,-1.34,93.4537




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5197920
  custom_metrics: {}
  date: 2021-11-08_12-52-56
  done: false
  episode_len_mean: 91.87037037037037
  episode_media: {}
  episode_reward_max: 16.240000000000006
  episode_reward_mean: 4.518148148148158
  episode_reward_min: -1.3100000000000005
  episodes_this_iter: 108
  episodes_total: 56580
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1097878304302182
          entropy_coeff: 0.01
          kl: 0.013617577171121645
          policy_loss: -0.06161391684849166
          total_loss: 0.14340945755760384
          vf_explained_var: 0.927269697189331
          vf_loss: 0.19509871038648052
    num_agent_steps_sampled: 5197920
    num_agent_steps_trained: 5197920
    num_steps_sampled: 5197920
    num_steps_trained: 519

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,520,81546.7,5197920,4.51815,16.24,-1.31,91.8704


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5207916
  custom_metrics: {}
  date: 2021-11-08_12-55-19
  done: false
  episode_len_mean: 92.40366972477064
  episode_media: {}
  episode_reward_max: 14.60000000000002
  episode_reward_mean: 3.763119266055054
  episode_reward_min: -1.3800000000000006
  episodes_this_iter: 109
  episodes_total: 56689
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.115632369579413
          entropy_coeff: 0.01
          kl: 0.013957235410577048
          policy_loss: -0.061212314729793715
          total_loss: 0.1457759821262115
          vf_explained_var: 0.911159098148346
          vf_loss: 0.19634829252066777
    num_agent_steps_sampled: 5207916
    num_agent_steps_trained: 5207916
    num_steps_sampled: 5207916
    num_steps_trained: 52079

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,521,81690.5,5207916,3.76312,14.6,-1.38,92.4037




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5217912
  custom_metrics: {}
  date: 2021-11-08_12-58-05
  done: false
  episode_len_mean: 91.77777777777777
  episode_media: {}
  episode_reward_max: 14.740000000000016
  episode_reward_mean: 3.854907407407415
  episode_reward_min: -1.4600000000000006
  episodes_this_iter: 108
  episodes_total: 56797
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1061897431683336
          entropy_coeff: 0.01
          kl: 0.013120371759991112
          policy_loss: -0.06321739338211015
          total_loss: 0.13204655883212885
          vf_explained_var: 0.9357517957687378
          vf_loss: 0.18643600097101212
    num_agent_steps_sampled: 5217912
    num_agent_steps_trained: 5217912
    num_steps_sampled: 5217912
    num_steps_trained: 52

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,522,81855.7,5217912,3.85491,14.74,-1.46,91.7778




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5227908
  custom_metrics: {}
  date: 2021-11-08_13-00-49
  done: false
  episode_len_mean: 91.6788990825688
  episode_media: {}
  episode_reward_max: 10.84000000000002
  episode_reward_mean: 3.9786238532110185
  episode_reward_min: -1.0200000000000002
  episodes_this_iter: 109
  episodes_total: 56906
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.117904774347941
          entropy_coeff: 0.01
          kl: 0.012920226079659267
          policy_loss: -0.06282846046627587
          total_loss: 0.11593650874132529
          vf_explained_var: 0.9414200186729431
          vf_loss: 0.17051012660098128
    num_agent_steps_sampled: 5227908
    num_agent_steps_trained: 5227908
    num_steps_sampled: 5227908
    num_steps_trained: 5227

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,523,82020.4,5227908,3.97862,10.84,-1.02,91.6789


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5237904
  custom_metrics: {}
  date: 2021-11-08_13-03-13
  done: false
  episode_len_mean: 92.56880733944953
  episode_media: {}
  episode_reward_max: 12.070000000000018
  episode_reward_mean: 4.028165137614689
  episode_reward_min: -1.5800000000000007
  episodes_this_iter: 109
  episodes_total: 57015
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1287213825771953
          entropy_coeff: 0.01
          kl: 0.014042567156743005
          policy_loss: -0.06329085616410797
          total_loss: 0.12138225234822075
          vf_explained_var: 0.928408145904541
          vf_loss: 0.1739695975318169
    num_agent_steps_sampled: 5237904
    num_agent_steps_trained: 5237904
    num_steps_sampled: 5237904
    num_steps_trained: 5237

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,524,82163.9,5237904,4.02817,12.07,-1.58,92.5688




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5247900
  custom_metrics: {}
  date: 2021-11-08_13-06-05
  done: false
  episode_len_mean: 92.36697247706422
  episode_media: {}
  episode_reward_max: 12.310000000000018
  episode_reward_mean: 4.243302752293587
  episode_reward_min: -1.3100000000000005
  episodes_this_iter: 109
  episodes_total: 57124
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1224549461633733
          entropy_coeff: 0.01
          kl: 0.0141029891189659
          policy_loss: -0.06211653473731289
          total_loss: 0.1443781211001114
          vf_explained_var: 0.9269353747367859
          vf_loss: 0.19559083195546498
    num_agent_steps_sampled: 5247900
    num_agent_steps_trained: 5247900
    num_steps_sampled: 5247900
    num_steps_trained: 52479

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,525,82335.7,5247900,4.2433,12.31,-1.31,92.367




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5257896
  custom_metrics: {}
  date: 2021-11-08_13-09-15
  done: false
  episode_len_mean: 88.65178571428571
  episode_media: {}
  episode_reward_max: 12.340000000000016
  episode_reward_mean: 4.559196428571438
  episode_reward_min: -0.8100000000000005
  episodes_this_iter: 112
  episodes_total: 57236
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0867025793108165
          entropy_coeff: 0.01
          kl: 0.014236735597800042
          policy_loss: -0.05921387435215661
          total_loss: 0.15112157918863062
          vf_explained_var: 0.9311022758483887
          vf_loss: 0.1987694161856531
    num_agent_steps_sampled: 5257896
    num_agent_steps_trained: 5257896
    num_steps_sampled: 5257896
    num_steps_trained: 525

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,526,82525.5,5257896,4.5592,12.34,-0.81,88.6518




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5267892
  custom_metrics: {}
  date: 2021-11-08_13-11-52
  done: false
  episode_len_mean: 93.58878504672897
  episode_media: {}
  episode_reward_max: 14.650000000000015
  episode_reward_mean: 4.259345794392533
  episode_reward_min: -1.4200000000000008
  episodes_this_iter: 107
  episodes_total: 57343
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.098027848586058
          entropy_coeff: 0.01
          kl: 0.014655866090834574
          policy_loss: -0.0587108884850501
          total_loss: 0.1639556520881179
          vf_explained_var: 0.9283369779586792
          vf_loss: 0.2102589237670868
    num_agent_steps_sampled: 5267892
    num_agent_steps_trained: 5267892
    num_steps_sampled: 5267892
    num_steps_trained: 526789

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,527,82683.2,5267892,4.25935,14.65,-1.42,93.5888




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5277888
  custom_metrics: {}
  date: 2021-11-08_13-14-24
  done: false
  episode_len_mean: 94.12380952380953
  episode_media: {}
  episode_reward_max: 14.540000000000019
  episode_reward_mean: 4.346380952380963
  episode_reward_min: -1.1400000000000003
  episodes_this_iter: 105
  episodes_total: 57448
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1300883459229754
          entropy_coeff: 0.01
          kl: 0.013496976871551884
          policy_loss: -0.06485281226058037
          total_loss: 0.14791298045848425
          vf_explained_var: 0.9276853203773499
          vf_loss: 0.20331887528618686
    num_agent_steps_sampled: 5277888
    num_agent_steps_trained: 5277888
    num_steps_sampled: 5277888
    num_steps_trained: 52

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,528,82834.6,5277888,4.34638,14.54,-1.14,94.1238




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5287884
  custom_metrics: {}
  date: 2021-11-08_13-16-54
  done: false
  episode_len_mean: 93.6355140186916
  episode_media: {}
  episode_reward_max: 12.550000000000017
  episode_reward_mean: 4.1935514018691675
  episode_reward_min: -1.9500000000000008
  episodes_this_iter: 107
  episodes_total: 57555
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.103852041766175
          entropy_coeff: 0.01
          kl: 0.013829866216996329
          policy_loss: -0.062081591106760196
          total_loss: 0.1409207211433249
          vf_explained_var: 0.9276747107505798
          vf_loss: 0.19253466809088857
    num_agent_steps_sampled: 5287884
    num_agent_steps_trained: 5287884
    num_steps_sampled: 5287884
    num_steps_trained: 528

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,529,82984.6,5287884,4.19355,12.55,-1.95,93.6355




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5297880
  custom_metrics: {}
  date: 2021-11-08_13-19-44
  done: false
  episode_len_mean: 91.4770642201835
  episode_media: {}
  episode_reward_max: 14.56000000000002
  episode_reward_mean: 4.007522935779825
  episode_reward_min: -1.4100000000000004
  episodes_this_iter: 109
  episodes_total: 57664
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.111947026721433
          entropy_coeff: 0.01
          kl: 0.013446435920724765
          policy_loss: -0.056443963015181384
          total_loss: 0.17800765629523457
          vf_explained_var: 0.9089188575744629
          vf_loss: 0.22493842656636595
    num_agent_steps_sampled: 5297880
    num_agent_steps_trained: 5297880
    num_steps_sampled: 5297880
    num_steps_trained: 5297

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,530,83154.5,5297880,4.00752,14.56,-1.41,91.4771


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5307876
  custom_metrics: {}
  date: 2021-11-08_13-22-05
  done: false
  episode_len_mean: 93.81308411214954
  episode_media: {}
  episode_reward_max: 14.400000000000016
  episode_reward_mean: 4.543551401869169
  episode_reward_min: -1.0300000000000005
  episodes_this_iter: 107
  episodes_total: 57771
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1086813394839945
          entropy_coeff: 0.01
          kl: 0.013090634785011365
          policy_loss: -0.058955756531885034
          total_loss: 0.1524802708520721
          vf_explained_var: 0.9332290291786194
          vf_loss: 0.2027007366076876
    num_agent_steps_sampled: 5307876
    num_agent_steps_trained: 5307876
    num_steps_sampled: 5307876
    num_steps_trained: 530

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,531,83295.8,5307876,4.54355,14.4,-1.03,93.8131




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5317872
  custom_metrics: {}
  date: 2021-11-08_13-24-53
  done: false
  episode_len_mean: 92.19444444444444
  episode_media: {}
  episode_reward_max: 14.670000000000018
  episode_reward_mean: 4.868518518518528
  episode_reward_min: 0.20999999999999885
  episodes_this_iter: 108
  episodes_total: 57879
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0957597552201688
          entropy_coeff: 0.01
          kl: 0.013235945593683088
          policy_loss: -0.05968620150198794
          total_loss: 0.15888229465573772
          vf_explained_var: 0.9284929037094116
          vf_loss: 0.20937295434757686
    num_agent_steps_sampled: 5317872
    num_agent_steps_trained: 5317872
    num_steps_sampled: 5317872
    num_steps_trained: 53

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,532,83463.3,5317872,4.86852,14.67,0.21,92.1944




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5327868
  custom_metrics: {}
  date: 2021-11-08_13-27-36
  done: false
  episode_len_mean: 92.25688073394495
  episode_media: {}
  episode_reward_max: 16.709999999999994
  episode_reward_mean: 5.014036697247717
  episode_reward_min: -1.3300000000000003
  episodes_this_iter: 109
  episodes_total: 57988
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0825749575582324
          entropy_coeff: 0.01
          kl: 0.013982716474234717
          policy_loss: -0.059735090143851234
          total_loss: 0.15998104583535694
          vf_explained_var: 0.936674177646637
          vf_loss: 0.208687508580649
    num_agent_steps_sampled: 5327868
    num_agent_steps_trained: 5327868
    num_steps_sampled: 5327868
    num_steps_trained: 5327

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,533,83626.3,5327868,5.01404,16.71,-1.33,92.2569




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5337864
  custom_metrics: {}
  date: 2021-11-08_13-30-32
  done: false
  episode_len_mean: 91.12844036697248
  episode_media: {}
  episode_reward_max: 12.960000000000013
  episode_reward_mean: 4.142201834862394
  episode_reward_min: -1.2900000000000007
  episodes_this_iter: 109
  episodes_total: 58097
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0958415668234864
          entropy_coeff: 0.01
          kl: 0.01404722303592444
          policy_loss: -0.06038371780170844
          total_loss: 0.1562538922708641
          vf_explained_var: 0.9165080785751343
          vf_loss: 0.20559469498853145
    num_agent_steps_sampled: 5337864
    num_agent_steps_trained: 5337864
    num_steps_sampled: 5337864
    num_steps_trained: 5337

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,534,83802.7,5337864,4.1422,12.96,-1.29,91.1284




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5347860
  custom_metrics: {}
  date: 2021-11-08_13-33-10
  done: false
  episode_len_mean: 92.12962962962963
  episode_media: {}
  episode_reward_max: 14.69000000000002
  episode_reward_mean: 4.184722222222231
  episode_reward_min: -1.0200000000000005
  episodes_this_iter: 108
  episodes_total: 58205
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.10811417265835
          entropy_coeff: 0.01
          kl: 0.014250946977197967
          policy_loss: -0.05814604886099059
          total_loss: 0.1785970377210432
          vf_explained_var: 0.9220886826515198
          vf_loss: 0.22535879026588976
    num_agent_steps_sampled: 5347860
    num_agent_steps_trained: 5347860
    num_steps_sampled: 5347860
    num_steps_trained: 534786

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,535,83960,5347860,4.18472,14.69,-1.02,92.1296




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5357856
  custom_metrics: {}
  date: 2021-11-08_13-35-59
  done: false
  episode_len_mean: 91.32727272727273
  episode_media: {}
  episode_reward_max: 15.150000000000013
  episode_reward_mean: 4.803727272727284
  episode_reward_min: -1.1199999999999972
  episodes_this_iter: 110
  episodes_total: 58315
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1173803890872205
          entropy_coeff: 0.01
          kl: 0.013800225294744038
          policy_loss: -0.06007792347421249
          total_loss: 0.17518436686398509
          vf_explained_var: 0.9313674569129944
          vf_loss: 0.22499745287574255
    num_agent_steps_sampled: 5357856
    num_agent_steps_trained: 5357856
    num_steps_sampled: 5357856
    num_steps_trained: 53

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,536,84129.5,5357856,4.80373,15.15,-1.12,91.3273




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5367852
  custom_metrics: {}
  date: 2021-11-08_13-38-34
  done: false
  episode_len_mean: 92.5137614678899
  episode_media: {}
  episode_reward_max: 11.230000000000013
  episode_reward_mean: 3.9913761467890003
  episode_reward_min: -1.0300000000000002
  episodes_this_iter: 109
  episodes_total: 58424
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1158148866433364
          entropy_coeff: 0.01
          kl: 0.013696671013788731
          policy_loss: -0.06124350930381025
          total_loss: 0.1605529491805559
          vf_explained_var: 0.9235116839408875
          vf_loss: 0.2117518772936275
    num_agent_steps_sampled: 5367852
    num_agent_steps_trained: 5367852
    num_steps_sampled: 5367852
    num_steps_trained: 5367

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,537,84284.6,5367852,3.99138,11.23,-1.03,92.5138




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5377848
  custom_metrics: {}
  date: 2021-11-08_13-41-27
  done: false
  episode_len_mean: 93.08333333333333
  episode_media: {}
  episode_reward_max: 14.570000000000014
  episode_reward_mean: 4.814351851851863
  episode_reward_min: -1.5100000000000007
  episodes_this_iter: 108
  episodes_total: 58532
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.113657966434446
          entropy_coeff: 0.01
          kl: 0.01365897501662787
          policy_loss: -0.059253093913898
          total_loss: 0.15795752851085532
          vf_explained_var: 0.921197235584259
          vf_loss: 0.20723034830556974
    num_agent_steps_sampled: 5377848
    num_agent_steps_trained: 5377848
    num_steps_sampled: 5377848
    num_steps_trained: 5377848

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,538,84457.2,5377848,4.81435,14.57,-1.51,93.0833




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5387844
  custom_metrics: {}
  date: 2021-11-08_13-44-18
  done: false
  episode_len_mean: 90.57798165137615
  episode_media: {}
  episode_reward_max: 15.010000000000014
  episode_reward_mean: 4.104311926605513
  episode_reward_min: -1.1500000000000006
  episodes_this_iter: 109
  episodes_total: 58641
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.079506234327952
          entropy_coeff: 0.01
          kl: 0.013938062567815214
          policy_loss: -0.0572331278381917
          total_loss: 0.1445187621511137
          vf_explained_var: 0.92279452085495
          vf_loss: 0.19079430221110327
    num_agent_steps_sampled: 5387844
    num_agent_steps_trained: 5387844
    num_steps_sampled: 5387844
    num_steps_trained: 5387844

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,539,84628,5387844,4.10431,15.01,-1.15,90.578




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5397840
  custom_metrics: {}
  date: 2021-11-08_13-47-20
  done: false
  episode_len_mean: 92.19444444444444
  episode_media: {}
  episode_reward_max: 14.580000000000014
  episode_reward_mean: 4.414444444444454
  episode_reward_min: -1.3700000000000006
  episodes_this_iter: 108
  episodes_total: 58749
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1224972596535316
          entropy_coeff: 0.01
          kl: 0.013513202840703039
          policy_loss: -0.05861307194886299
          total_loss: 0.1473868373908803
          vf_explained_var: 0.9261518716812134
          vf_loss: 0.19644011633836816
    num_agent_steps_sampled: 5397840
    num_agent_steps_trained: 5397840
    num_steps_sampled: 5397840
    num_steps_trained: 539

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,540,84810.3,5397840,4.41444,14.58,-1.37,92.1944




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5407836
  custom_metrics: {}
  date: 2021-11-08_13-50-15
  done: false
  episode_len_mean: 91.84545454545454
  episode_media: {}
  episode_reward_max: 12.940000000000017
  episode_reward_mean: 4.598545454545464
  episode_reward_min: -1.4600000000000006
  episodes_this_iter: 110
  episodes_total: 58859
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0993182665262466
          entropy_coeff: 0.01
          kl: 0.013523756899828326
          policy_loss: -0.0604423452518944
          total_loss: 0.16556128500045364
          vf_explained_var: 0.9341967701911926
          vf_loss: 0.21618800431362584
    num_agent_steps_sampled: 5407836
    num_agent_steps_trained: 5407836
    num_steps_sampled: 5407836
    num_steps_trained: 540

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,541,84985.3,5407836,4.59855,12.94,-1.46,91.8455




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5417832
  custom_metrics: {}
  date: 2021-11-08_13-52-51
  done: false
  episode_len_mean: 93.10280373831776
  episode_media: {}
  episode_reward_max: 14.760000000000014
  episode_reward_mean: 4.785046728971972
  episode_reward_min: -1.2900000000000005
  episodes_this_iter: 107
  episodes_total: 58966
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.079795038088774
          entropy_coeff: 0.01
          kl: 0.01601888356858705
          policy_loss: -0.05634640531980584
          total_loss: 0.19129693815007043
          vf_explained_var: 0.9202739000320435
          vf_loss: 0.23194827261961934
    num_agent_steps_sampled: 5417832
    num_agent_steps_trained: 5417832
    num_steps_sampled: 5417832
    num_steps_trained: 5417

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,542,85141.1,5417832,4.78505,14.76,-1.29,93.1028


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5427828
  custom_metrics: {}
  date: 2021-11-08_13-55-13
  done: false
  episode_len_mean: 93.85046728971963
  episode_media: {}
  episode_reward_max: 16.03999999999993
  episode_reward_mean: 4.428878504672906
  episode_reward_min: -1.990000000000001
  episodes_this_iter: 107
  episodes_total: 59073
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1029617609121862
          entropy_coeff: 0.01
          kl: 0.012720898227053907
          policy_loss: -0.06034982153652316
          total_loss: 0.11398557692161228
          vf_explained_var: 0.936488687992096
          vf_loss: 0.16638521949768575
    num_agent_steps_sampled: 5427828
    num_agent_steps_trained: 5427828
    num_steps_sampled: 5427828
    num_steps_trained: 54278

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,543,85283.1,5427828,4.42888,16.04,-1.99,93.8505




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5437824
  custom_metrics: {}
  date: 2021-11-08_13-57-49
  done: false
  episode_len_mean: 92.77570093457943
  episode_media: {}
  episode_reward_max: 10.880000000000008
  episode_reward_mean: 3.7232710280373924
  episode_reward_min: -1.6299999999999983
  episodes_this_iter: 107
  episodes_total: 59180
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.132742713455461
          entropy_coeff: 0.01
          kl: 0.012828393548039047
          policy_loss: -0.06222912662177004
          total_loss: 0.1302838707406424
          vf_explained_var: 0.931811511516571
          vf_loss: 0.18461574034717604
    num_agent_steps_sampled: 5437824
    num_agent_steps_trained: 5437824
    num_steps_sampled: 5437824
    num_steps_trained: 5437

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,544,85439.3,5437824,3.72327,10.88,-1.63,92.7757




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5447820
  custom_metrics: {}
  date: 2021-11-08_14-00-33
  done: false
  episode_len_mean: 93.92592592592592
  episode_media: {}
  episode_reward_max: 14.460000000000019
  episode_reward_mean: 4.43935185185186
  episode_reward_min: -1.0300000000000005
  episodes_this_iter: 108
  episodes_total: 59288
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.117107607666244
          entropy_coeff: 0.01
          kl: 0.014566742965105604
          policy_loss: -0.06164571019924349
          total_loss: 0.16456431604953659
          vf_explained_var: 0.9144766926765442
          vf_loss: 0.21419623875274107
    num_agent_steps_sampled: 5447820
    num_agent_steps_trained: 5447820
    num_steps_sampled: 5447820
    num_steps_trained: 5447

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,545,85603.4,5447820,4.43935,14.46,-1.03,93.9259




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5457816
  custom_metrics: {}
  date: 2021-11-08_14-03-25
  done: false
  episode_len_mean: 94.66346153846153
  episode_media: {}
  episode_reward_max: 16.229999999999976
  episode_reward_mean: 3.985576923076932
  episode_reward_min: -1.4500000000000006
  episodes_this_iter: 104
  episodes_total: 59392
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0898315430706385
          entropy_coeff: 0.01
          kl: 0.014023966748892225
          policy_loss: -0.060405650422868565
          total_loss: 0.14429814017449433
          vf_explained_var: 0.9155336022377014
          vf_loss: 0.19365375478648478
    num_agent_steps_sampled: 5457816
    num_agent_steps_trained: 5457816
    num_steps_sampled: 5457816
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,546,85774.7,5457816,3.98558,16.23,-1.45,94.6635


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5467812
  custom_metrics: {}
  date: 2021-11-08_14-05-45
  done: false
  episode_len_mean: 97.94174757281553
  episode_media: {}
  episode_reward_max: 14.840000000000016
  episode_reward_mean: 3.979805825242729
  episode_reward_min: -1.1800000000000004
  episodes_this_iter: 103
  episodes_total: 59495
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1221640221074094
          entropy_coeff: 0.01
          kl: 0.012168143496712918
          policy_loss: -0.06374057633961495
          total_loss: 0.10979448929031053
          vf_explained_var: 0.9257641434669495
          vf_loss: 0.16703615366425525
    num_agent_steps_sampled: 5467812
    num_agent_steps_trained: 5467812
    num_steps_sampled: 5467812
    num_steps_trained: 54

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,547,85914.6,5467812,3.97981,14.84,-1.18,97.9417




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5477808
  custom_metrics: {}
  date: 2021-11-08_14-08-42
  done: false
  episode_len_mean: 92.57943925233644
  episode_media: {}
  episode_reward_max: 18.52999999999998
  episode_reward_mean: 4.531308411214963
  episode_reward_min: -1.2100000000000006
  episodes_this_iter: 107
  episodes_total: 59602
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0901615819360457
          entropy_coeff: 0.01
          kl: 0.013984358730504107
          policy_loss: -0.05962097406005248
          total_loss: 0.121728762325982
          vf_explained_var: 0.9353776574134827
          vf_loss: 0.1703932338211144
    num_agent_steps_sampled: 5477808
    num_agent_steps_trained: 5477808
    num_steps_sampled: 5477808
    num_steps_trained: 547780

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,548,86092,5477808,4.53131,18.53,-1.21,92.5794




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5487804
  custom_metrics: {}
  date: 2021-11-08_14-11-33
  done: false
  episode_len_mean: 97.13461538461539
  episode_media: {}
  episode_reward_max: 12.370000000000012
  episode_reward_mean: 4.8245192307692415
  episode_reward_min: -1.2500000000000009
  episodes_this_iter: 104
  episodes_total: 59706
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.106488652412708
          entropy_coeff: 0.01
          kl: 0.012955207707914848
          policy_loss: -0.05929898787767459
          total_loss: 0.14620760125705065
          vf_explained_var: 0.9313664436340332
          vf_loss: 0.1970578927005458
    num_agent_steps_sampled: 5487804
    num_agent_steps_trained: 5487804
    num_steps_sampled: 5487804
    num_steps_trained: 548

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,549,86262.7,5487804,4.82452,12.37,-1.25,97.1346


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5497800
  custom_metrics: {}
  date: 2021-11-08_14-13-48
  done: false
  episode_len_mean: 98.41584158415841
  episode_media: {}
  episode_reward_max: 18.609999999999964
  episode_reward_mean: 4.903168316831693
  episode_reward_min: -1.9300000000000008
  episodes_this_iter: 101
  episodes_total: 59807
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.087763271576319
          entropy_coeff: 0.01
          kl: 0.01358697612411217
          policy_loss: -0.06110504951773801
          total_loss: 0.149701973147945
          vf_explained_var: 0.9237630367279053
          vf_loss: 0.20073182529045475
    num_agent_steps_sampled: 5497800
    num_agent_steps_trained: 5497800
    num_steps_sampled: 5497800
    num_steps_trained: 549780

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,550,86397.9,5497800,4.90317,18.61,-1.93,98.4158




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5507796
  custom_metrics: {}
  date: 2021-11-08_14-16-21
  done: false
  episode_len_mean: 95.04761904761905
  episode_media: {}
  episode_reward_max: 10.990000000000013
  episode_reward_mean: 3.938285714285724
  episode_reward_min: -2.3
  episodes_this_iter: 105
  episodes_total: 59912
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.113592221084823
          entropy_coeff: 0.01
          kl: 0.012761217669983239
          policy_loss: -0.0647590008413053
          total_loss: 0.1145809355008806
          vf_explained_var: 0.9276897311210632
          vf_loss: 0.1714042083447815
    num_agent_steps_sampled: 5507796
    num_agent_steps_trained: 5507796
    num_steps_sampled: 5507796
    num_steps_trained: 5507796
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,551,86550.8,5507796,3.93829,10.99,-2.3,95.0476




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5517792
  custom_metrics: {}
  date: 2021-11-08_14-19-04
  done: false
  episode_len_mean: 95.6
  episode_media: {}
  episode_reward_max: 18.559999999999985
  episode_reward_mean: 3.7421904761904843
  episode_reward_min: -1.6200000000000006
  episodes_this_iter: 105
  episodes_total: 60017
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1001140689238524
          entropy_coeff: 0.01
          kl: 0.014142219486582723
          policy_loss: -0.05797019996481319
          total_loss: 0.1410063862378717
          vf_explained_var: 0.9060169458389282
          vf_loss: 0.18775998218796955
    num_agent_steps_sampled: 5517792
    num_agent_steps_trained: 5517792
    num_steps_sampled: 5517792
    num_steps_trained: 5517792
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,552,86713.9,5517792,3.74219,18.56,-1.62,95.6




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5527788
  custom_metrics: {}
  date: 2021-11-08_14-21-34
  done: false
  episode_len_mean: 94.82857142857142
  episode_media: {}
  episode_reward_max: 16.19999999999997
  episode_reward_mean: 3.9903809523809612
  episode_reward_min: -1.8700000000000012
  episodes_this_iter: 105
  episodes_total: 60122
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0886417756732714
          entropy_coeff: 0.01
          kl: 0.014578155521683423
          policy_loss: -0.05931386993640763
          total_loss: 0.1537234797166326
          vf_explained_var: 0.9301291704177856
          vf_loss: 0.20071290642277806
    num_agent_steps_sampled: 5527788
    num_agent_steps_trained: 5527788
    num_steps_sampled: 5527788
    num_steps_trained: 552

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,553,86863.8,5527788,3.99038,16.2,-1.87,94.8286




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5537784
  custom_metrics: {}
  date: 2021-11-08_14-24-03
  done: false
  episode_len_mean: 96.22330097087378
  episode_media: {}
  episode_reward_max: 14.660000000000018
  episode_reward_mean: 4.166310679611661
  episode_reward_min: -1.3400000000000003
  episodes_this_iter: 103
  episodes_total: 60225
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1305883557368546
          entropy_coeff: 0.01
          kl: 0.013348259296253374
          policy_loss: -0.06127652532429013
          total_loss: 0.11463095615498531
          vf_explained_var: 0.9268736243247986
          vf_loss: 0.1668043609788148
    num_agent_steps_sampled: 5537784
    num_agent_steps_trained: 5537784
    num_steps_sampled: 5537784
    num_steps_trained: 553

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,554,87013.1,5537784,4.16631,14.66,-1.34,96.2233


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5547780
  custom_metrics: {}
  date: 2021-11-08_14-26-21
  done: false
  episode_len_mean: 98.0
  episode_media: {}
  episode_reward_max: 18.56999999999997
  episode_reward_mean: 4.212427184466028
  episode_reward_min: -1.2300000000000009
  episodes_this_iter: 103
  episodes_total: 60328
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.101371503385723
          entropy_coeff: 0.01
          kl: 0.014350534173940244
          policy_loss: -0.0578761015413727
          total_loss: 0.1613149062404011
          vf_explained_var: 0.9125538468360901
          vf_loss: 0.20751241046585078
    num_agent_steps_sampled: 5547780
    num_agent_steps_trained: 5547780
    num_steps_sampled: 5547780
    num_steps_trained: 5547780
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,555,87150.5,5547780,4.21243,18.57,-1.23,98




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5557776
  custom_metrics: {}
  date: 2021-11-08_14-28-48
  done: false
  episode_len_mean: 94.43809523809524
  episode_media: {}
  episode_reward_max: 16.619999999999997
  episode_reward_mean: 3.8175238095238173
  episode_reward_min: -0.8400000000000005
  episodes_this_iter: 105
  episodes_total: 60433
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1093335673340365
          entropy_coeff: 0.01
          kl: 0.013666655944587023
          policy_loss: -0.0581693112404428
          total_loss: 0.12408526585970679
          vf_explained_var: 0.9160301089286804
          vf_loss: 0.17221356074397381
    num_agent_steps_sampled: 5557776
    num_agent_steps_trained: 5557776
    num_steps_sampled: 5557776
    num_steps_trained: 55

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,556,87297.9,5557776,3.81752,16.62,-0.84,94.4381




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5567772
  custom_metrics: {}
  date: 2021-11-08_14-31-20
  done: false
  episode_len_mean: 95.31132075471699
  episode_media: {}
  episode_reward_max: 18.049999999999944
  episode_reward_mean: 3.7350000000000088
  episode_reward_min: -0.9500000000000008
  episodes_this_iter: 106
  episodes_total: 60539
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.130917805789882
          entropy_coeff: 0.01
          kl: 0.01363034685125795
          policy_loss: -0.06078351783828858
          total_loss: 0.12173784685790794
          vf_explained_var: 0.9223684072494507
          vf_loss: 0.17277890781784414
    num_agent_steps_sampled: 5567772
    num_agent_steps_trained: 5567772
    num_steps_sampled: 5567772
    num_steps_trained: 556

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,557,87449.3,5567772,3.735,18.05,-0.95,95.3113




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5577768
  custom_metrics: {}
  date: 2021-11-08_14-34-02
  done: false
  episode_len_mean: 93.59433962264151
  episode_media: {}
  episode_reward_max: 18.399999999999956
  episode_reward_mean: 4.58622641509435
  episode_reward_min: -0.7900000000000005
  episodes_this_iter: 106
  episodes_total: 60645
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.102572497037741
          entropy_coeff: 0.01
          kl: 0.013546017764906538
          policy_loss: -0.057151614743261
          total_loss: 0.15669177235462345
          vf_explained_var: 0.923291027545929
          vf_loss: 0.20400958995088042
    num_agent_steps_sampled: 5577768
    num_agent_steps_trained: 5577768
    num_steps_sampled: 5577768
    num_steps_trained: 5577768

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,558,87611.3,5577768,4.58623,18.4,-0.79,93.5943


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5587764
  custom_metrics: {}
  date: 2021-11-08_14-36-19
  done: false
  episode_len_mean: 96.01923076923077
  episode_media: {}
  episode_reward_max: 15.930000000000007
  episode_reward_mean: 4.084134615384626
  episode_reward_min: -1.6400000000000008
  episodes_this_iter: 104
  episodes_total: 60749
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1249206466552537
          entropy_coeff: 0.01
          kl: 0.012710931475381862
          policy_loss: -0.0621241164035522
          total_loss: 0.10258069276554972
          vf_explained_var: 0.9347090721130371
          vf_loss: 0.15699692345900923
    num_agent_steps_sampled: 5587764
    num_agent_steps_trained: 5587764
    num_steps_sampled: 5587764
    num_steps_trained: 558

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,559,87748.4,5587764,4.08413,15.93,-1.64,96.0192




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5597760
  custom_metrics: {}
  date: 2021-11-08_14-39-07
  done: false
  episode_len_mean: 94.73584905660377
  episode_media: {}
  episode_reward_max: 13.000000000000016
  episode_reward_mean: 4.40433962264152
  episode_reward_min: 0.3499999999999992
  episodes_this_iter: 106
  episodes_total: 60855
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.111757546204787
          entropy_coeff: 0.01
          kl: 0.013321529596412779
          policy_loss: -0.060682342803248995
          total_loss: 0.12996081647534782
          vf_explained_var: 0.930446445941925
          vf_loss: 0.1814126251822608
    num_agent_steps_sampled: 5597760
    num_agent_steps_trained: 5597760
    num_steps_sampled: 5597760
    num_steps_trained: 559776

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,560,87916.8,5597760,4.40434,13,0.35,94.7358




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5607756
  custom_metrics: {}
  date: 2021-11-08_14-41-42
  done: false
  episode_len_mean: 93.94339622641509
  episode_media: {}
  episode_reward_max: 14.670000000000018
  episode_reward_mean: 4.3020754716981235
  episode_reward_min: -1.840000000000001
  episodes_this_iter: 106
  episodes_total: 60961
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0920642964860314
          entropy_coeff: 0.01
          kl: 0.014161456346660346
          policy_loss: -0.06066488181041856
          total_loss: 0.1189184041057005
          vf_explained_var: 0.9322993755340576
          vf_loss: 0.1682423601914038
    num_agent_steps_sampled: 5607756
    num_agent_steps_trained: 5607756
    num_steps_sampled: 5607756
    num_steps_trained: 5607

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,561,88071.2,5607756,4.30208,14.67,-1.84,93.9434




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5617752
  custom_metrics: {}
  date: 2021-11-08_14-44-14
  done: false
  episode_len_mean: 94.25471698113208
  episode_media: {}
  episode_reward_max: 14.779999999999985
  episode_reward_mean: 4.290188679245293
  episode_reward_min: -1.4300000000000004
  episodes_this_iter: 106
  episodes_total: 61067
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1088318141097697
          entropy_coeff: 0.01
          kl: 0.012972699754291743
          policy_loss: -0.059741780747715224
          total_loss: 0.11331195953166764
          vf_explained_var: 0.9373618364334106
          vf_loss: 0.1645886252610347
    num_agent_steps_sampled: 5617752
    num_agent_steps_trained: 5617752
    num_steps_sampled: 5617752
    num_steps_trained: 56

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,562,88223.1,5617752,4.29019,14.78,-1.43,94.2547




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5627748
  custom_metrics: {}
  date: 2021-11-08_14-46-52
  done: false
  episode_len_mean: 96.37142857142857
  episode_media: {}
  episode_reward_max: 16.709999999999997
  episode_reward_mean: 4.108857142857152
  episode_reward_min: -1.3800000000000008
  episodes_this_iter: 105
  episodes_total: 61172
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.111761159876473
          entropy_coeff: 0.01
          kl: 0.012903218447010914
          policy_loss: -0.0611564925338468
          total_loss: 0.11632023781386769
          vf_explained_var: 0.927923858165741
          vf_loss: 0.1691991965708315
    num_agent_steps_sampled: 5627748
    num_agent_steps_trained: 5627748
    num_steps_sampled: 5627748
    num_steps_trained: 562774

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,563,88381.5,5627748,4.10886,16.71,-1.38,96.3714




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5637744
  custom_metrics: {}
  date: 2021-11-08_14-49-19
  done: false
  episode_len_mean: 97.80392156862744
  episode_media: {}
  episode_reward_max: 14.860000000000017
  episode_reward_mean: 4.350784313725501
  episode_reward_min: -1.4800000000000006
  episodes_this_iter: 102
  episodes_total: 61274
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0831594235876687
          entropy_coeff: 0.01
          kl: 0.01360411753299439
          policy_loss: -0.057978357116763406
          total_loss: 0.1398462994499769
          vf_explained_var: 0.93357914686203
          vf_loss: 0.1876643703565893
    num_agent_steps_sampled: 5637744
    num_agent_steps_trained: 5637744
    num_steps_sampled: 5637744
    num_steps_trained: 563774

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,564,88528.1,5637744,4.35078,14.86,-1.48,97.8039




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5647740
  custom_metrics: {}
  date: 2021-11-08_14-51-46
  done: false
  episode_len_mean: 95.73076923076923
  episode_media: {}
  episode_reward_max: 16.519999999999957
  episode_reward_mean: 3.8318269230769317
  episode_reward_min: -1.4900000000000009
  episodes_this_iter: 104
  episodes_total: 61378
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1114331655013254
          entropy_coeff: 0.01
          kl: 0.014193142736159613
          policy_loss: -0.05732968415205295
          total_loss: 0.13388233005395558
          vf_explained_var: 0.9313586950302124
          vf_loss: 0.17999259233474732
    num_agent_steps_sampled: 5647740
    num_agent_steps_trained: 5647740
    num_steps_sampled: 5647740
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,565,88675.6,5647740,3.83183,16.52,-1.49,95.7308




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5657736
  custom_metrics: {}
  date: 2021-11-08_14-54-33
  done: false
  episode_len_mean: 96.2135922330097
  episode_media: {}
  episode_reward_max: 18.549999999999965
  episode_reward_mean: 4.449029126213602
  episode_reward_min: -1.830000000000001
  episodes_this_iter: 103
  episodes_total: 61481
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.125589787348723
          entropy_coeff: 0.01
          kl: 0.013694745164425795
          policy_loss: -0.05725410152067486
          total_loss: 0.15695247527562145
          vf_explained_var: 0.930344820022583
          vf_loss: 0.20426413255114842
    num_agent_steps_sampled: 5657736
    num_agent_steps_trained: 5657736
    num_steps_sampled: 5657736
    num_steps_trained: 565773

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,566,88842.1,5657736,4.44903,18.55,-1.83,96.2136


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5667732
  custom_metrics: {}
  date: 2021-11-08_14-56-48
  done: false
  episode_len_mean: 98.39603960396039
  episode_media: {}
  episode_reward_max: 18.559999999999985
  episode_reward_mean: 4.673069306930703
  episode_reward_min: -1.0600000000000005
  episodes_this_iter: 101
  episodes_total: 61582
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1246428875841645
          entropy_coeff: 0.01
          kl: 0.013805994990320744
          policy_loss: -0.05637580822261735
          total_loss: 0.16774615691576758
          vf_explained_var: 0.9338754415512085
          vf_loss: 0.21391661034053208
    num_agent_steps_sampled: 5667732
    num_agent_steps_trained: 5667732
    num_steps_sampled: 5667732
    num_steps_trained: 56

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,567,88977.1,5667732,4.67307,18.56,-1.06,98.396




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5677728
  custom_metrics: {}
  date: 2021-11-08_14-59-44
  done: false
  episode_len_mean: 94.51401869158879
  episode_media: {}
  episode_reward_max: 16.630000000000013
  episode_reward_mean: 4.22373831775702
  episode_reward_min: -1.5900000000000005
  episodes_this_iter: 107
  episodes_total: 61689
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.111798931187035
          entropy_coeff: 0.01
          kl: 0.012712736163032305
          policy_loss: -0.05579937226815611
          total_loss: 0.1250577067160326
          vf_explained_var: 0.9399455785751343
          vf_loss: 0.1730138639593099
    num_agent_steps_sampled: 5677728
    num_agent_steps_trained: 5677728
    num_steps_sampled: 5677728
    num_steps_trained: 567772

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,568,89152.7,5677728,4.22374,16.63,-1.59,94.514




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5687724
  custom_metrics: {}
  date: 2021-11-08_15-02-11
  done: false
  episode_len_mean: 95.35576923076923
  episode_media: {}
  episode_reward_max: 12.460000000000015
  episode_reward_mean: 3.738942307692317
  episode_reward_min: -1.6800000000000008
  episodes_this_iter: 104
  episodes_total: 61793
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1293568797600577
          entropy_coeff: 0.01
          kl: 0.01274605658765556
          policy_loss: -0.06260139083442015
          total_loss: 0.11342840661398239
          vf_explained_var: 0.927426815032959
          vf_loss: 0.1682862551500782
    num_agent_steps_sampled: 5687724
    num_agent_steps_trained: 5687724
    num_steps_sampled: 5687724
    num_steps_trained: 56877

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,569,89300.2,5687724,3.73894,12.46,-1.68,95.3558


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5697720
  custom_metrics: {}
  date: 2021-11-08_15-04-27
  done: false
  episode_len_mean: 97.36274509803921
  episode_media: {}
  episode_reward_max: 12.790000000000013
  episode_reward_mean: 3.954313725490205
  episode_reward_min: -0.7900000000000006
  episodes_this_iter: 102
  episodes_total: 61895
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1289142335581985
          entropy_coeff: 0.01
          kl: 0.012876059788955957
          policy_loss: -0.06154343008867696
          total_loss: 0.11185215872147272
          vf_explained_var: 0.9265124201774597
          vf_loss: 0.16535145655815672
    num_agent_steps_sampled: 5697720
    num_agent_steps_trained: 5697720
    num_steps_sampled: 5697720
    num_steps_trained: 56

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,570,89436.2,5697720,3.95431,12.79,-0.79,97.3627




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5707716
  custom_metrics: {}
  date: 2021-11-08_15-07-22
  done: false
  episode_len_mean: 98.95098039215686
  episode_media: {}
  episode_reward_max: 14.99000000000001
  episode_reward_mean: 4.88529411764707
  episode_reward_min: -1.2300000000000006
  episodes_this_iter: 102
  episodes_total: 61997
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.11611845177463
          entropy_coeff: 0.01
          kl: 0.013918796629754336
          policy_loss: -0.05853500746063187
          total_loss: 0.14110093718856317
          vf_explained_var: 0.9326081871986389
          vf_loss: 0.1890883695620757
    num_agent_steps_sampled: 5707716
    num_agent_steps_trained: 5707716
    num_steps_sampled: 5707716
    num_steps_trained: 5707716

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,571,89611.2,5707716,4.88529,14.99,-1.23,98.951




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5717712
  custom_metrics: {}
  date: 2021-11-08_15-10-11
  done: false
  episode_len_mean: 94.48571428571428
  episode_media: {}
  episode_reward_max: 18.349999999999977
  episode_reward_mean: 4.718571428571438
  episode_reward_min: -1.3300000000000005
  episodes_this_iter: 105
  episodes_total: 62102
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0956934713909767
          entropy_coeff: 0.01
          kl: 0.015026483812684038
          policy_loss: -0.05660930984359012
          total_loss: 0.18063569214736294
          vf_explained_var: 0.92402583360672
          vf_loss: 0.22396972761449652
    num_agent_steps_sampled: 5717712
    num_agent_steps_trained: 5717712
    num_steps_sampled: 5717712
    num_steps_trained: 5717

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,572,89779.5,5717712,4.71857,18.35,-1.33,94.4857


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5727708
  custom_metrics: {}
  date: 2021-11-08_15-12-27
  done: false
  episode_len_mean: 96.86538461538461
  episode_media: {}
  episode_reward_max: 14.150000000000018
  episode_reward_mean: 3.9190384615384706
  episode_reward_min: -1.6600000000000008
  episodes_this_iter: 104
  episodes_total: 62206
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1222985537643106
          entropy_coeff: 0.01
          kl: 0.013088027680257621
          policy_loss: -0.05929172728688289
          total_loss: 0.13136825050251225
          vf_explained_var: 0.9302048087120056
          vf_loss: 0.18206679937230724
    num_agent_steps_sampled: 5727708
    num_agent_steps_trained: 5727708
    num_steps_sampled: 5727708
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,573,89916.1,5727708,3.91904,14.15,-1.66,96.8654




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5737704
  custom_metrics: {}
  date: 2021-11-08_15-15-06
  done: false
  episode_len_mean: 95.35576923076923
  episode_media: {}
  episode_reward_max: 16.239999999999952
  episode_reward_mean: 4.041346153846163
  episode_reward_min: -1.660000000000001
  episodes_this_iter: 104
  episodes_total: 62310
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1104150819982217
          entropy_coeff: 0.01
          kl: 0.013011378765768961
          policy_loss: -0.054749308401543614
          total_loss: 0.12710538263886403
          vf_explained_var: 0.9332365393638611
          vf_loss: 0.1733172937320211
    num_agent_steps_sampled: 5737704
    num_agent_steps_trained: 5737704
    num_steps_sampled: 5737704
    num_steps_trained: 573

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,574,90074.9,5737704,4.04135,16.24,-1.66,95.3558




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5747700
  custom_metrics: {}
  date: 2021-11-08_15-17-51
  done: false
  episode_len_mean: 96.01904761904763
  episode_media: {}
  episode_reward_max: 16.640000000000015
  episode_reward_mean: 4.517142857142867
  episode_reward_min: -1.3500000000000003
  episodes_this_iter: 105
  episodes_total: 62415
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1175304061327225
          entropy_coeff: 0.01
          kl: 0.013377492058826487
          policy_loss: -0.05812235514545797
          total_loss: 0.12376510336453843
          vf_explained_var: 0.9337720274925232
          vf_loss: 0.17258716298537885
    num_agent_steps_sampled: 5747700
    num_agent_steps_trained: 5747700
    num_steps_sampled: 5747700
    num_steps_trained: 57

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,575,90239.5,5747700,4.51714,16.64,-1.35,96.019




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5757696
  custom_metrics: {}
  date: 2021-11-08_15-20-27
  done: false
  episode_len_mean: 97.86274509803921
  episode_media: {}
  episode_reward_max: 10.480000000000016
  episode_reward_mean: 4.164803921568638
  episode_reward_min: -1.5800000000000005
  episodes_this_iter: 102
  episodes_total: 62517
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1415650240376465
          entropy_coeff: 0.01
          kl: 0.013022765933451965
          policy_loss: -0.05839841022100459
          total_loss: 0.1108402054104158
          vf_explained_var: 0.9338297247886658
          vf_loss: 0.16098677593991798
    num_agent_steps_sampled: 5757696
    num_agent_steps_trained: 5757696
    num_steps_sampled: 5757696
    num_steps_trained: 575

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,576,90395.5,5757696,4.1648,10.48,-1.58,97.8627




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5767692
  custom_metrics: {}
  date: 2021-11-08_15-22-56
  done: false
  episode_len_mean: 98.26732673267327
  episode_media: {}
  episode_reward_max: 14.180000000000017
  episode_reward_mean: 3.7522772277227836
  episode_reward_min: -1.6600000000000008
  episodes_this_iter: 101
  episodes_total: 62618
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1368523485640174
          entropy_coeff: 0.01
          kl: 0.01223401428030731
          policy_loss: -0.061329151237081006
          total_loss: 0.10362102854280518
          vf_explained_var: 0.932658314704895
          vf_loss: 0.15844808905106833
    num_agent_steps_sampled: 5767692
    num_agent_steps_trained: 5767692
    num_steps_sampled: 5767692
    num_steps_trained: 57

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,577,90544.9,5767692,3.75228,14.18,-1.66,98.2673




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5777688
  custom_metrics: {}
  date: 2021-11-08_15-25-25
  done: false
  episode_len_mean: 97.1826923076923
  episode_media: {}
  episode_reward_max: 18.549999999999994
  episode_reward_mean: 4.41644230769232
  episode_reward_min: -0.6900000000000004
  episodes_this_iter: 104
  episodes_total: 62722
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0897754640660735
          entropy_coeff: 0.01
          kl: 0.014193872556980939
          policy_loss: -0.06148469492188122
          total_loss: 0.1301862336257393
          vf_explained_var: 0.9389698505401611
          vf_loss: 0.18023326695920566
    num_agent_steps_sampled: 5777688
    num_agent_steps_trained: 5777688
    num_steps_sampled: 5777688
    num_steps_trained: 57776

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,578,90694,5777688,4.41644,18.55,-0.69,97.1827




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5787684
  custom_metrics: {}
  date: 2021-11-08_15-28-01
  done: false
  episode_len_mean: 94.80952380952381
  episode_media: {}
  episode_reward_max: 18.389999999999976
  episode_reward_mean: 4.1093333333333435
  episode_reward_min: -1.3600000000000008
  episodes_this_iter: 105
  episodes_total: 62827
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.11247673921096
          entropy_coeff: 0.01
          kl: 0.013258675426111028
          policy_loss: -0.05678113014715859
          total_loss: 0.11954396263911174
          vf_explained_var: 0.9340867400169373
          vf_loss: 0.16724493984196687
    num_agent_steps_sampled: 5787684
    num_agent_steps_trained: 5787684
    num_steps_sampled: 5787684
    num_steps_trained: 578

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,579,90850.2,5787684,4.10933,18.39,-1.36,94.8095




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5797680
  custom_metrics: {}
  date: 2021-11-08_15-30-36
  done: false
  episode_len_mean: 96.23809523809524
  episode_media: {}
  episode_reward_max: 14.810000000000016
  episode_reward_mean: 4.37752380952382
  episode_reward_min: -1.760000000000001
  episodes_this_iter: 105
  episodes_total: 62932
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.139003818564945
          entropy_coeff: 0.01
          kl: 0.012632674746849589
          policy_loss: -0.06047894972989447
          total_loss: 0.10914798924683505
          vf_explained_var: 0.9325283765792847
          vf_loss: 0.16223816505322855
    num_agent_steps_sampled: 5797680
    num_agent_steps_trained: 5797680
    num_steps_sampled: 5797680
    num_steps_trained: 57976

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,580,91004.3,5797680,4.37752,14.81,-1.76,96.2381


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5807676
  custom_metrics: {}
  date: 2021-11-08_15-32-57
  done: false
  episode_len_mean: 94.8173076923077
  episode_media: {}
  episode_reward_max: 12.620000000000017
  episode_reward_mean: 3.7976923076923166
  episode_reward_min: -1.6900000000000008
  episodes_this_iter: 104
  episodes_total: 63036
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1235980173461457
          entropy_coeff: 0.01
          kl: 0.013047743152547732
          policy_loss: -0.0610131273077976
          total_loss: 0.11772249293967317
          vf_explained_var: 0.931091845035553
          vf_loss: 0.17024720936663384
    num_agent_steps_sampled: 5807676
    num_agent_steps_trained: 5807676
    num_steps_sampled: 5807676
    num_steps_trained: 5807

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,581,91145.6,5807676,3.79769,12.62,-1.69,94.8173




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5817672
  custom_metrics: {}
  date: 2021-11-08_15-35-43
  done: false
  episode_len_mean: 93.27522935779817
  episode_media: {}
  episode_reward_max: 14.810000000000018
  episode_reward_mean: 4.417339449541296
  episode_reward_min: -1.0200000000000007
  episodes_this_iter: 109
  episodes_total: 63145
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0888265744233743
          entropy_coeff: 0.01
          kl: 0.012787690342119327
          policy_loss: -0.05942912434076524
          total_loss: 0.0952718137174399
          vf_explained_var: 0.9450990557670593
          vf_loss: 0.14645724596025853
    num_agent_steps_sampled: 5817672
    num_agent_steps_trained: 5817672
    num_steps_sampled: 5817672
    num_steps_trained: 581

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,582,91311.7,5817672,4.41734,14.81,-1.02,93.2752




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5827668
  custom_metrics: {}
  date: 2021-11-08_15-38-40
  done: false
  episode_len_mean: 93.47169811320755
  episode_media: {}
  episode_reward_max: 18.729999999999993
  episode_reward_mean: 4.451415094339632
  episode_reward_min: -1.0900000000000005
  episodes_this_iter: 106
  episodes_total: 63251
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1105083973998697
          entropy_coeff: 0.01
          kl: 0.013114676600634105
          policy_loss: -0.058445780376434074
          total_loss: 0.13139237880818228
          vf_explained_var: 0.9309390783309937
          vf_loss: 0.1810663706328497
    num_agent_steps_sampled: 5827668
    num_agent_steps_trained: 5827668
    num_steps_sampled: 5827668
    num_steps_trained: 58

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,583,91488.9,5827668,4.45142,18.73,-1.09,93.4717




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5837664
  custom_metrics: {}
  date: 2021-11-08_15-41-24
  done: false
  episode_len_mean: 94.29245283018868
  episode_media: {}
  episode_reward_max: 14.450000000000017
  episode_reward_mean: 4.426603773584917
  episode_reward_min: -1.690000000000001
  episodes_this_iter: 106
  episodes_total: 63357
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.135132402334458
          entropy_coeff: 0.01
          kl: 0.012615785338136537
          policy_loss: -0.06299000650835343
          total_loss: 0.11981204319745302
          vf_explained_var: 0.9225195646286011
          vf_loss: 0.17541303541033695
    num_agent_steps_sampled: 5837664
    num_agent_steps_trained: 5837664
    num_steps_sampled: 5837664
    num_steps_trained: 5837

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,584,91652.5,5837664,4.4266,14.45,-1.69,94.2925




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5847660
  custom_metrics: {}
  date: 2021-11-08_15-44-00
  done: false
  episode_len_mean: 93.20560747663552
  episode_media: {}
  episode_reward_max: 18.589999999999943
  episode_reward_mean: 4.168691588785056
  episode_reward_min: -0.8800000000000006
  episodes_this_iter: 107
  episodes_total: 63464
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0994578001845596
          entropy_coeff: 0.01
          kl: 0.013408779114780932
          policy_loss: -0.05935329878623159
          total_loss: 0.12297094432859976
          vf_explained_var: 0.9290771484375
          vf_loss: 0.17277194406582505
    num_agent_steps_sampled: 5847660
    num_agent_steps_trained: 5847660
    num_steps_sampled: 5847660
    num_steps_trained: 58476

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,585,91808.7,5847660,4.16869,18.59,-0.88,93.2056


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5857656
  custom_metrics: {}
  date: 2021-11-08_15-46-21
  done: false
  episode_len_mean: 93.61682242990655
  episode_media: {}
  episode_reward_max: 12.84000000000001
  episode_reward_mean: 3.639065420560756
  episode_reward_min: -1.0700000000000005
  episodes_this_iter: 107
  episodes_total: 63571
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1475603682363134
          entropy_coeff: 0.01
          kl: 0.013065970761701923
          policy_loss: -0.061166023080929735
          total_loss: 0.10554548617945904
          vf_explained_var: 0.9128769636154175
          vf_loss: 0.15842119538758556
    num_agent_steps_sampled: 5857656
    num_agent_steps_trained: 5857656
    num_steps_sampled: 5857656
    num_steps_trained: 58

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,586,91949.3,5857656,3.63907,12.84,-1.07,93.6168




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5867652
  custom_metrics: {}
  date: 2021-11-08_15-48-48
  done: false
  episode_len_mean: 96.0
  episode_media: {}
  episode_reward_max: 16.81
  episode_reward_mean: 4.437115384615395
  episode_reward_min: -1.0200000000000005
  episodes_this_iter: 104
  episodes_total: 63675
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.123581993070423
          entropy_coeff: 0.01
          kl: 0.013928562996960764
          policy_loss: -0.059352648500193894
          total_loss: 0.11803770516004063
          vf_explained_var: 0.9377892017364502
          vf_loss: 0.16689516631806764
    num_agent_steps_sampled: 5867652
    num_agent_steps_trained: 5867652
    num_steps_sampled: 5867652
    num_steps_trained: 5867652
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,587,92096,5867652,4.43712,16.81,-1.02,96




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5877648
  custom_metrics: {}
  date: 2021-11-08_15-51-32
  done: false
  episode_len_mean: 92.25925925925925
  episode_media: {}
  episode_reward_max: 18.43999999999996
  episode_reward_mean: 4.383518518518527
  episode_reward_min: -1.990000000000001
  episodes_this_iter: 108
  episodes_total: 63783
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.124985766105163
          entropy_coeff: 0.01
          kl: 0.013527383604483429
          policy_loss: -0.05806903232557651
          total_loss: 0.11112539591586106
          vf_explained_var: 0.9344698190689087
          vf_loss: 0.1596272146456644
    num_agent_steps_sampled: 5877648
    num_agent_steps_trained: 5877648
    num_steps_sampled: 5877648
    num_steps_trained: 587764

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,588,92260.1,5877648,4.38352,18.44,-1.99,92.2593




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5887644
  custom_metrics: {}
  date: 2021-11-08_15-54-00
  done: false
  episode_len_mean: 94.76415094339623
  episode_media: {}
  episode_reward_max: 16.379999999999995
  episode_reward_mean: 4.299528301886803
  episode_reward_min: -1.3100000000000005
  episodes_this_iter: 106
  episodes_total: 63889
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1708978046718825
          entropy_coeff: 0.01
          kl: 0.01390035472534917
          policy_loss: -0.05904315555961723
          total_loss: 0.1286022069792335
          vf_explained_var: 0.9323331117630005
          vf_loss: 0.17768759459862088
    num_agent_steps_sampled: 5887644
    num_agent_steps_trained: 5887644
    num_steps_sampled: 5887644
    num_steps_trained: 5887

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,589,92408.5,5887644,4.29953,16.38,-1.31,94.7642




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5897640
  custom_metrics: {}
  date: 2021-11-08_15-56-44
  done: false
  episode_len_mean: 94.07619047619048
  episode_media: {}
  episode_reward_max: 12.240000000000018
  episode_reward_mean: 3.932857142857153
  episode_reward_min: -1.5100000000000007
  episodes_this_iter: 105
  episodes_total: 63994
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.161429124408298
          entropy_coeff: 0.01
          kl: 0.013044921797192256
          policy_loss: -0.05960427982270972
          total_loss: 0.09470152233719316
          vf_explained_var: 0.9346832036972046
          vf_loss: 0.1462021298531411
    num_agent_steps_sampled: 5897640
    num_agent_steps_trained: 5897640
    num_steps_sampled: 5897640
    num_steps_trained: 5897

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,590,92572.3,5897640,3.93286,12.24,-1.51,94.0762




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5907636
  custom_metrics: {}
  date: 2021-11-08_15-59-35
  done: false
  episode_len_mean: 94.40186915887851
  episode_media: {}
  episode_reward_max: 16.389999999999954
  episode_reward_mean: 4.645420560747674
  episode_reward_min: -1.3800000000000006
  episodes_this_iter: 107
  episodes_total: 64101
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1440144416613456
          entropy_coeff: 0.01
          kl: 0.013758097355336464
          policy_loss: -0.058257630384630624
          total_loss: 0.1458266498059289
          vf_explained_var: 0.9303377866744995
          vf_loss: 0.1941817581701355
    num_agent_steps_sampled: 5907636
    num_agent_steps_trained: 5907636
    num_steps_sampled: 5907636
    num_steps_trained: 590

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,591,92743.1,5907636,4.64542,16.39,-1.38,94.4019




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5917632
  custom_metrics: {}
  date: 2021-11-08_16-02-10
  done: false
  episode_len_mean: 94.97142857142858
  episode_media: {}
  episode_reward_max: 18.11999999999993
  episode_reward_mean: 4.503047619047628
  episode_reward_min: -1.1900000000000008
  episodes_this_iter: 105
  episodes_total: 64206
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.130168798438504
          entropy_coeff: 0.01
          kl: 0.01339760308818369
          policy_loss: -0.05607923977713809
          total_loss: 0.13200290953167357
          vf_explained_var: 0.9246425628662109
          vf_loss: 0.17886242219207124
    num_agent_steps_sampled: 5917632
    num_agent_steps_trained: 5917632
    num_steps_sampled: 5917632
    num_steps_trained: 59176

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,592,92898.2,5917632,4.50305,18.12,-1.19,94.9714




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5927628
  custom_metrics: {}
  date: 2021-11-08_16-04-45
  done: false
  episode_len_mean: 94.5
  episode_media: {}
  episode_reward_max: 10.940000000000014
  episode_reward_mean: 3.780000000000009
  episode_reward_min: -1.820000000000001
  episodes_this_iter: 106
  episodes_total: 64312
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.150738593655774
          entropy_coeff: 0.01
          kl: 0.013227631653614494
          policy_loss: -0.06098084485747366
          total_loss: 0.11883582885130349
          vf_explained_var: 0.9121790528297424
          vf_loss: 0.17118985978170084
    num_agent_steps_sampled: 5927628
    num_agent_steps_trained: 5927628
    num_steps_sampled: 5927628
    num_steps_trained: 5927628
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,593,93053.3,5927628,3.78,10.94,-1.82,94.5


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5937624
  custom_metrics: {}
  date: 2021-11-08_16-07-01
  done: false
  episode_len_mean: 96.15384615384616
  episode_media: {}
  episode_reward_max: 13.910000000000021
  episode_reward_mean: 4.121346153846165
  episode_reward_min: -1.9600000000000006
  episodes_this_iter: 104
  episodes_total: 64416
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1234185085337387
          entropy_coeff: 0.01
          kl: 0.013588477816308597
          policy_loss: -0.05855492487963703
          total_loss: 0.13206734011889013
          vf_explained_var: 0.9326071739196777
          vf_loss: 0.18090019777226143
    num_agent_steps_sampled: 5937624
    num_agent_steps_trained: 5937624
    num_steps_sampled: 5937624
    num_steps_trained: 59

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,594,93189.5,5937624,4.12135,13.91,-1.96,96.1538




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5947620
  custom_metrics: {}
  date: 2021-11-08_16-09-45
  done: false
  episode_len_mean: 92.42990654205607
  episode_media: {}
  episode_reward_max: 18.680000000000007
  episode_reward_mean: 4.141121495327113
  episode_reward_min: -1.1800000000000006
  episodes_this_iter: 107
  episodes_total: 64523
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1268386359907625
          entropy_coeff: 0.01
          kl: 0.013582260174002402
          policy_loss: -0.058077648553487835
          total_loss: 0.13345923917288455
          vf_explained_var: 0.9291658401489258
          vf_loss: 0.181863186818858
    num_agent_steps_sampled: 5947620
    num_agent_steps_trained: 5947620
    num_steps_sampled: 5947620
    num_steps_trained: 594

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,595,93352.5,5947620,4.14112,18.68,-1.18,92.4299




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5957616
  custom_metrics: {}
  date: 2021-11-08_16-12-28
  done: false
  episode_len_mean: 95.02830188679245
  episode_media: {}
  episode_reward_max: 10.830000000000013
  episode_reward_mean: 3.833962264150953
  episode_reward_min: -1.0900000000000007
  episodes_this_iter: 106
  episodes_total: 64629
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1669265762353556
          entropy_coeff: 0.01
          kl: 0.012891657054149049
          policy_loss: -0.06105582023787702
          total_loss: 0.10757751285186053
          vf_explained_var: 0.9119713306427002
          vf_loss: 0.16093379092426635
    num_agent_steps_sampled: 5957616
    num_agent_steps_trained: 5957616
    num_steps_sampled: 5957616
    num_steps_trained: 59

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,596,93515.7,5957616,3.83396,10.83,-1.09,95.0283




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5967612
  custom_metrics: {}
  date: 2021-11-08_16-15-01
  done: false
  episode_len_mean: 96.0
  episode_media: {}
  episode_reward_max: 16.280000000000005
  episode_reward_mean: 3.7023076923077025
  episode_reward_min: -1.820000000000001
  episodes_this_iter: 104
  episodes_total: 64733
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1401639098795053
          entropy_coeff: 0.01
          kl: 0.013463769548662631
          policy_loss: -0.056806754445832254
          total_loss: 0.12019490659650829
          vf_explained_var: 0.9250617623329163
          vf_loss: 0.1677311497939448
    num_agent_steps_sampled: 5967612
    num_agent_steps_trained: 5967612
    num_steps_sampled: 5967612
    num_steps_trained: 5967612
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,597,93669,5967612,3.70231,16.28,-1.82,96


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5977608
  custom_metrics: {}
  date: 2021-11-08_16-17-18
  done: false
  episode_len_mean: 97.84313725490196
  episode_media: {}
  episode_reward_max: 12.920000000000016
  episode_reward_mean: 4.048921568627462
  episode_reward_min: -1.0000000000000007
  episodes_this_iter: 102
  episodes_total: 64835
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.151366860132951
          entropy_coeff: 0.01
          kl: 0.012954233246761775
          policy_loss: -0.05918484636797355
          total_loss: 0.11483853229631981
          vf_explained_var: 0.9263361096382141
          vf_loss: 0.16602568375344715
    num_agent_steps_sampled: 5977608
    num_agent_steps_trained: 5977608
    num_steps_sampled: 5977608
    num_steps_trained: 597

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,598,93805.4,5977608,4.04892,12.92,-1,97.8431




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5987604
  custom_metrics: {}
  date: 2021-11-08_16-19-47
  done: false
  episode_len_mean: 96.48076923076923
  episode_media: {}
  episode_reward_max: 16.800000000000008
  episode_reward_mean: 4.1527884615384725
  episode_reward_min: -1.5000000000000013
  episodes_this_iter: 104
  episodes_total: 64939
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1101163412770654
          entropy_coeff: 0.01
          kl: 0.013229292464051131
          policy_loss: -0.055623831728903145
          total_loss: 0.13284822104450983
          vf_explained_var: 0.9338278770446777
          vf_loss: 0.17943523382592916
    num_agent_steps_sampled: 5987604
    num_agent_steps_trained: 5987604
    num_steps_sampled: 5987604
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,599,93954.6,5987604,4.15279,16.8,-1.5,96.4808




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 5997600
  custom_metrics: {}
  date: 2021-11-08_16-22-40
  done: false
  episode_len_mean: 92.55045871559633
  episode_media: {}
  episode_reward_max: 14.360000000000019
  episode_reward_mean: 3.7272477064220277
  episode_reward_min: -2.229999999999999
  episodes_this_iter: 109
  episodes_total: 65048
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.117859230795477
          entropy_coeff: 0.01
          kl: 0.012295408742214842
          policy_loss: -0.059410868385313155
          total_loss: 0.1088710914652508
          vf_explained_var: 0.9214643239974976
          vf_loss: 0.16145007294626573
    num_agent_steps_sampled: 5997600
    num_agent_steps_trained: 5997600
    num_steps_sampled: 5997600
    num_steps_trained: 599

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,600,94127.6,5997600,3.72725,14.36,-2.23,92.5505




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6007596
  custom_metrics: {}
  date: 2021-11-08_16-25-18
  done: false
  episode_len_mean: 95.36538461538461
  episode_media: {}
  episode_reward_max: 16.40999999999997
  episode_reward_mean: 4.263653846153856
  episode_reward_min: -1.2400000000000002
  episodes_this_iter: 104
  episodes_total: 65152
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.093578578468062
          entropy_coeff: 0.01
          kl: 0.013538664759100807
          policy_loss: -0.05654829387736117
          total_loss: 0.12993837884571563
          vf_explained_var: 0.925896406173706
          vf_loss: 0.17657968611735056
    num_agent_steps_sampled: 6007596
    num_agent_steps_trained: 6007596
    num_steps_sampled: 6007596
    num_steps_trained: 60075

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,601,94285.4,6007596,4.26365,16.41,-1.24,95.3654




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6017592
  custom_metrics: {}
  date: 2021-11-08_16-27-53
  done: false
  episode_len_mean: 96.625
  episode_media: {}
  episode_reward_max: 13.100000000000014
  episode_reward_mean: 4.438942307692319
  episode_reward_min: -1.8100000000000012
  episodes_this_iter: 104
  episodes_total: 65256
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.134808207882775
          entropy_coeff: 0.01
          kl: 0.013567326058598542
          policy_loss: -0.05793546962305012
          total_loss: 0.14122960823707473
          vf_explained_var: 0.9290462732315063
          vf_loss: 0.18960509311057563
    num_agent_steps_sampled: 6017592
    num_agent_steps_trained: 6017592
    num_steps_sampled: 6017592
    num_steps_trained: 6017592
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,602,94441,6017592,4.43894,13.1,-1.81,96.625




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6027588
  custom_metrics: {}
  date: 2021-11-08_16-30-22
  done: false
  episode_len_mean: 95.26666666666667
  episode_media: {}
  episode_reward_max: 14.320000000000016
  episode_reward_mean: 4.1758095238095345
  episode_reward_min: -1.1400000000000003
  episodes_this_iter: 105
  episodes_total: 65361
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0967081141268085
          entropy_coeff: 0.01
          kl: 0.014061174528990436
          policy_loss: -0.05650557609450104
          total_loss: 0.1549942787290893
          vf_explained_var: 0.9187370538711548
          vf_loss: 0.20043382317337216
    num_agent_steps_sampled: 6027588
    num_agent_steps_trained: 6027588
    num_steps_sampled: 6027588
    num_steps_trained: 60

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,603,94589.6,6027588,4.17581,14.32,-1.14,95.2667




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6037584
  custom_metrics: {}
  date: 2021-11-08_16-32-58
  done: false
  episode_len_mean: 92.3177570093458
  episode_media: {}
  episode_reward_max: 16.36000000000001
  episode_reward_mean: 4.167289719626178
  episode_reward_min: -0.9700000000000001
  episodes_this_iter: 107
  episodes_total: 65468
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1071212492437446
          entropy_coeff: 0.01
          kl: 0.013369670695793692
          policy_loss: -0.05846573930774999
          total_loss: 0.15065426809442603
          vf_explained_var: 0.9226050972938538
          vf_loss: 0.19973343782381625
    num_agent_steps_sampled: 6037584
    num_agent_steps_trained: 6037584
    num_steps_sampled: 6037584
    num_steps_trained: 6037

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,604,94745.7,6037584,4.16729,16.36,-0.97,92.3178




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6047580
  custom_metrics: {}
  date: 2021-11-08_16-35-50
  done: false
  episode_len_mean: 91.21621621621621
  episode_media: {}
  episode_reward_max: 16.639999999999976
  episode_reward_mean: 4.434414414414424
  episode_reward_min: -1.6900000000000006
  episodes_this_iter: 111
  episodes_total: 65579
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0822798090103345
          entropy_coeff: 0.01
          kl: 0.015528589776854167
          policy_loss: -0.051732172368046565
          total_loss: 0.20033554388050978
          vf_explained_var: 0.9221460819244385
          vf_loss: 0.23751444423364268
    num_agent_steps_sampled: 6047580
    num_agent_steps_trained: 6047580
    num_steps_sampled: 6047580
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,605,94917.2,6047580,4.43441,16.64,-1.69,91.2162




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6057576
  custom_metrics: {}
  date: 2021-11-08_16-38-48
  done: false
  episode_len_mean: 94.14150943396227
  episode_media: {}
  episode_reward_max: 22.539999999999978
  episode_reward_mean: 4.543679245283029
  episode_reward_min: -0.8900000000000006
  episodes_this_iter: 106
  episodes_total: 65685
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1024950867025263
          entropy_coeff: 0.01
          kl: 0.012433871149733405
          policy_loss: -0.05750427319485153
          total_loss: 0.13576658898884925
          vf_explained_var: 0.9236668348312378
          vf_loss: 0.18596989940692726
    num_agent_steps_sampled: 6057576
    num_agent_steps_trained: 6057576
    num_steps_sampled: 6057576
    num_steps_trained: 60

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,606,95095.6,6057576,4.54368,22.54,-0.89,94.1415




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6067572
  custom_metrics: {}
  date: 2021-11-08_16-41-54
  done: false
  episode_len_mean: 92.61682242990655
  episode_media: {}
  episode_reward_max: 15.820000000000002
  episode_reward_mean: 4.4189719626168324
  episode_reward_min: -2.0200000000000005
  episodes_this_iter: 107
  episodes_total: 65792
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1156532370127166
          entropy_coeff: 0.01
          kl: 0.013828350586568904
          policy_loss: -0.05493472648354677
          total_loss: 0.14536073698988583
          vf_explained_var: 0.9321929812431335
          vf_loss: 0.18994928425830654
    num_agent_steps_sampled: 6067572
    num_agent_steps_trained: 6067572
    num_steps_sampled: 6067572
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,607,95281,6067572,4.41897,15.82,-2.02,92.6168




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6077568
  custom_metrics: {}
  date: 2021-11-08_16-44-31
  done: false
  episode_len_mean: 92.06481481481481
  episode_media: {}
  episode_reward_max: 18.16999999999995
  episode_reward_mean: 3.904629629629639
  episode_reward_min: -1.7000000000000008
  episodes_this_iter: 108
  episodes_total: 65900
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1009035646405994
          entropy_coeff: 0.01
          kl: 0.013514540853004834
          policy_loss: -0.05127679873417076
          total_loss: 0.1417413914703533
          vf_explained_var: 0.9280036687850952
          vf_loss: 0.18323941202117847
    num_agent_steps_sampled: 6077568
    num_agent_steps_trained: 6077568
    num_steps_sampled: 6077568
    num_steps_trained: 6077

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,608,95438,6077568,3.90463,18.17,-1.7,92.0648




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6087564
  custom_metrics: {}
  date: 2021-11-08_16-47-03
  done: false
  episode_len_mean: 93.61682242990655
  episode_media: {}
  episode_reward_max: 11.850000000000021
  episode_reward_mean: 3.9930841121495426
  episode_reward_min: -1.4900000000000004
  episodes_this_iter: 107
  episodes_total: 66007
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1136604143004134
          entropy_coeff: 0.01
          kl: 0.012981545013456995
          policy_loss: -0.05548564346395751
          total_loss: 0.11695129473128507
          vf_explained_var: 0.9358007311820984
          vf_loss: 0.16399995928837194
    num_agent_steps_sampled: 6087564
    num_agent_steps_trained: 6087564
    num_steps_sampled: 6087564
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,609,95590.6,6087564,3.99308,11.85,-1.49,93.6168




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6097560
  custom_metrics: {}
  date: 2021-11-08_16-49-52
  done: false
  episode_len_mean: 96.4095238095238
  episode_media: {}
  episode_reward_max: 10.910000000000016
  episode_reward_mean: 3.6632380952381047
  episode_reward_min: -1.7300000000000006
  episodes_this_iter: 105
  episodes_total: 66112
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1318097994877743
          entropy_coeff: 0.01
          kl: 0.01303435134609167
          policy_loss: -0.05833752757240819
          total_loss: 0.1239823107002701
          vf_explained_var: 0.9184681177139282
          vf_loss: 0.17394405394856238
    num_agent_steps_sampled: 6097560
    num_agent_steps_trained: 6097560
    num_steps_sampled: 6097560
    num_steps_trained: 6097

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,610,95759.5,6097560,3.66324,10.91,-1.73,96.4095




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6107556
  custom_metrics: {}
  date: 2021-11-08_16-52-55
  done: false
  episode_len_mean: 94.67619047619047
  episode_media: {}
  episode_reward_max: 18.609999999999985
  episode_reward_mean: 4.09742857142858
  episode_reward_min: -1.5900000000000007
  episodes_this_iter: 105
  episodes_total: 66217
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1394429372926043
          entropy_coeff: 0.01
          kl: 0.01354917852273248
          policy_loss: -0.05569731776053325
          total_loss: 0.15396437496344886
          vf_explained_var: 0.9152456521987915
          vf_loss: 0.20018939829598634
    num_agent_steps_sampled: 6107556
    num_agent_steps_trained: 6107556
    num_steps_sampled: 6107556
    num_steps_trained: 6107

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,611,95942.5,6107556,4.09743,18.61,-1.59,94.6762




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6117552
  custom_metrics: {}
  date: 2021-11-08_16-55-49
  done: false
  episode_len_mean: 97.13592233009709
  episode_media: {}
  episode_reward_max: 11.410000000000023
  episode_reward_mean: 3.984271844660205
  episode_reward_min: -1.590000000000001
  episodes_this_iter: 103
  episodes_total: 66320
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.141811663370866
          entropy_coeff: 0.01
          kl: 0.011737417124215005
          policy_loss: -0.058952699986915304
          total_loss: 0.10028115069955333
          vf_explained_var: 0.9161065816879272
          vf_loss: 0.15391266309520882
    num_agent_steps_sampled: 6117552
    num_agent_steps_trained: 6117552
    num_steps_sampled: 6117552
    num_steps_trained: 611

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,612,96116.2,6117552,3.98427,11.41,-1.59,97.1359




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6127548
  custom_metrics: {}
  date: 2021-11-08_16-58-22
  done: false
  episode_len_mean: 95.125
  episode_media: {}
  episode_reward_max: 18.57999999999992
  episode_reward_mean: 3.7516346153846243
  episode_reward_min: -2.0700000000000007
  episodes_this_iter: 104
  episodes_total: 66424
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.120319952414586
          entropy_coeff: 0.01
          kl: 0.012886219191915483
          policy_loss: -0.05925345919612381
          total_loss: 0.13729405915563625
          vf_explained_var: 0.9189614057540894
          vf_loss: 0.18839429812267042
    num_agent_steps_sampled: 6127548
    num_agent_steps_trained: 6127548
    num_steps_sampled: 6127548
    num_steps_trained: 6127548
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,613,96268.9,6127548,3.75163,18.58,-2.07,95.125




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6137544
  custom_metrics: {}
  date: 2021-11-08_17-01-08
  done: false
  episode_len_mean: 95.15094339622641
  episode_media: {}
  episode_reward_max: 14.280000000000019
  episode_reward_mean: 3.47877358490567
  episode_reward_min: -1.5300000000000005
  episodes_this_iter: 106
  episodes_total: 66530
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.12180387403211
          entropy_coeff: 0.01
          kl: 0.013311748286458426
          policy_loss: -0.05419582814678677
          total_loss: 0.15469016740814998
          vf_explained_var: 0.9160909652709961
          vf_loss: 0.19977820693777923
    num_agent_steps_sampled: 6137544
    num_agent_steps_trained: 6137544
    num_steps_sampled: 6137544
    num_steps_trained: 61375

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,614,96435.3,6137544,3.47877,14.28,-1.53,95.1509




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6147540
  custom_metrics: {}
  date: 2021-11-08_17-04-02
  done: false
  episode_len_mean: 93.05607476635514
  episode_media: {}
  episode_reward_max: 14.440000000000014
  episode_reward_mean: 4.698785046728982
  episode_reward_min: -1.2500000000000004
  episodes_this_iter: 107
  episodes_total: 66637
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0946137798138156
          entropy_coeff: 0.01
          kl: 0.013183184017993552
          policy_loss: -0.055388165478650324
          total_loss: 0.1641604634734173
          vf_explained_var: 0.9237150549888611
          vf_loss: 0.21046182425039955
    num_agent_steps_sampled: 6147540
    num_agent_steps_trained: 6147540
    num_steps_sampled: 6147540
    num_steps_trained: 61

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,615,96609.6,6147540,4.69879,14.44,-1.25,93.0561




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6157536
  custom_metrics: {}
  date: 2021-11-08_17-06-50
  done: false
  episode_len_mean: 94.45283018867924
  episode_media: {}
  episode_reward_max: 13.88000000000002
  episode_reward_mean: 4.133113207547179
  episode_reward_min: -1.660000000000001
  episodes_this_iter: 106
  episodes_total: 66743
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.082654885043446
          entropy_coeff: 0.01
          kl: 0.011800768637631942
          policy_loss: -0.057999110040374294
          total_loss: 0.12190185900315897
          vf_explained_var: 0.9146696925163269
          vf_loss: 0.1738438916161784
    num_agent_steps_sampled: 6157536
    num_agent_steps_trained: 6157536
    num_steps_sampled: 6157536
    num_steps_trained: 61575

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,616,96777.2,6157536,4.13311,13.88,-1.66,94.4528




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6167532
  custom_metrics: {}
  date: 2021-11-08_17-09-25
  done: false
  episode_len_mean: 95.08571428571429
  episode_media: {}
  episode_reward_max: 16.329999999999945
  episode_reward_mean: 4.12495238095239
  episode_reward_min: -1.880000000000001
  episodes_this_iter: 105
  episodes_total: 66848
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.100297076987405
          entropy_coeff: 0.01
          kl: 0.013645882627848988
          policy_loss: -0.05515100805518719
          total_loss: 0.14720715572579932
          vf_explained_var: 0.923581063747406
          vf_loss: 0.192274108213874
    num_agent_steps_sampled: 6167532
    num_agent_steps_trained: 6167532
    num_steps_sampled: 6167532
    num_steps_trained: 6167532


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,617,96931.8,6167532,4.12495,16.33,-1.88,95.0857


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6177528
  custom_metrics: {}
  date: 2021-11-08_17-11-42
  done: false
  episode_len_mean: 97.45631067961165
  episode_media: {}
  episode_reward_max: 12.410000000000014
  episode_reward_mean: 4.155728155339816
  episode_reward_min: -1.940000000000001
  episodes_this_iter: 103
  episodes_total: 66951
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0950582385063172
          entropy_coeff: 0.01
          kl: 0.012684439993401549
          policy_loss: -0.059331819414296466
          total_loss: 0.1176136783292342
          vf_explained_var: 0.9330384731292725
          vf_loss: 0.16899933819460053
    num_agent_steps_sampled: 6177528
    num_agent_steps_trained: 6177528
    num_steps_sampled: 6177528
    num_steps_trained: 617

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,618,97069.2,6177528,4.15573,12.41,-1.94,97.4563




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6187524
  custom_metrics: {}
  date: 2021-11-08_17-14-24
  done: false
  episode_len_mean: 94.70476190476191
  episode_media: {}
  episode_reward_max: 13.710000000000022
  episode_reward_mean: 4.341238095238106
  episode_reward_min: -2.05
  episodes_this_iter: 105
  episodes_total: 67056
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1198540503143244
          entropy_coeff: 0.01
          kl: 0.01279845023206267
          policy_loss: -0.06043919415141528
          total_loss: 0.1144770239933561
          vf_explained_var: 0.9372344613075256
          vf_loss: 0.16695828733121992
    num_agent_steps_sampled: 6187524
    num_agent_steps_trained: 6187524
    num_steps_sampled: 6187524
    num_steps_trained: 6187524
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,619,97230.6,6187524,4.34124,13.71,-2.05,94.7048


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6197520
  custom_metrics: {}
  date: 2021-11-08_17-16-40
  done: false
  episode_len_mean: 97.41747572815534
  episode_media: {}
  episode_reward_max: 16.55999999999994
  episode_reward_mean: 4.434077669902924
  episode_reward_min: -0.9600000000000003
  episodes_this_iter: 103
  episodes_total: 67159
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1046169069078235
          entropy_coeff: 0.01
          kl: 0.012683676685013467
          policy_loss: -0.05780966350347058
          total_loss: 0.12389444047027928
          vf_explained_var: 0.9248759746551514
          vf_loss: 0.17385527105986054
    num_agent_steps_sampled: 6197520
    num_agent_steps_trained: 6197520
    num_steps_sampled: 6197520
    num_steps_trained: 619

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,620,97367.1,6197520,4.43408,16.56,-0.96,97.4175


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6207516
  custom_metrics: {}
  date: 2021-11-08_17-19-01
  done: false
  episode_len_mean: 97.3921568627451
  episode_media: {}
  episode_reward_max: 16.239999999999945
  episode_reward_mean: 3.707450980392166
  episode_reward_min: -1.9900000000000009
  episodes_this_iter: 102
  episodes_total: 67261
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.114752863818764
          entropy_coeff: 0.01
          kl: 0.014391086302995312
          policy_loss: -0.05431700977377402
          total_loss: 0.14236401968325177
          vf_explained_var: 0.9125540256500244
          vf_loss: 0.18504386358281485
    num_agent_steps_sampled: 6207516
    num_agent_steps_trained: 6207516
    num_steps_sampled: 6207516
    num_steps_trained: 6207

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,621,97508.2,6207516,3.70745,16.24,-1.99,97.3922




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6217512
  custom_metrics: {}
  date: 2021-11-08_17-21-43
  done: false
  episode_len_mean: 96.76699029126213
  episode_media: {}
  episode_reward_max: 14.620000000000015
  episode_reward_mean: 3.8441747572815643
  episode_reward_min: -0.8800000000000006
  episodes_this_iter: 103
  episodes_total: 67364
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1345779091883927
          entropy_coeff: 0.01
          kl: 0.012644858792888848
          policy_loss: -0.06091077424044538
          total_loss: 0.10646468593667333
          vf_explained_var: 0.9241423606872559
          vf_loss: 0.15991466956324557
    num_agent_steps_sampled: 6217512
    num_agent_steps_trained: 6217512
    num_steps_sampled: 6217512
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,622,97669.6,6217512,3.84417,14.62,-0.88,96.767




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6227508
  custom_metrics: {}
  date: 2021-11-08_17-24-12
  done: false
  episode_len_mean: 96.67619047619047
  episode_media: {}
  episode_reward_max: 13.800000000000024
  episode_reward_mean: 4.124095238095248
  episode_reward_min: 0.040000000000001874
  episodes_this_iter: 105
  episodes_total: 67469
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1263530381724367
          entropy_coeff: 0.01
          kl: 0.013068710621019119
          policy_loss: -0.060062190383258794
          total_loss: 0.1294089432288375
          vf_explained_var: 0.921183705329895
          vf_loss: 0.18096250601144684
    num_agent_steps_sampled: 6227508
    num_agent_steps_trained: 6227508
    num_steps_sampled: 6227508
    num_steps_trained: 62

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,623,97818.9,6227508,4.1241,13.8,0.04,96.6762




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6237504
  custom_metrics: {}
  date: 2021-11-08_17-26-45
  done: false
  episode_len_mean: 96.72549019607843
  episode_media: {}
  episode_reward_max: 14.250000000000018
  episode_reward_mean: 3.6206862745098145
  episode_reward_min: -1.5800000000000007
  episodes_this_iter: 102
  episodes_total: 67571
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.106545439337054
          entropy_coeff: 0.01
          kl: 0.013224135834274458
          policy_loss: -0.05853356032863132
          total_loss: 0.11266049646860005
          vf_explained_var: 0.9220457673072815
          vf_loss: 0.16213327585998127
    num_agent_steps_sampled: 6237504
    num_agent_steps_trained: 6237504
    num_steps_sampled: 6237504
    num_steps_trained: 62

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,624,97971.4,6237504,3.62069,14.25,-1.58,96.7255




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6247500
  custom_metrics: {}
  date: 2021-11-08_17-29-07
  done: false
  episode_len_mean: 99.78
  episode_media: {}
  episode_reward_max: 12.560000000000015
  episode_reward_mean: 3.5917000000000106
  episode_reward_min: -1.820000000000001
  episodes_this_iter: 100
  episodes_total: 67671
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1123040532454467
          entropy_coeff: 0.01
          kl: 0.012871281657375198
          policy_loss: -0.05876612865612802
          total_loss: 0.12491543687497958
          vf_explained_var: 0.9062916040420532
          vf_loss: 0.1754822165458503
    num_agent_steps_sampled: 6247500
    num_agent_steps_trained: 6247500
    num_steps_sampled: 6247500
    num_steps_trained: 6247500
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,625,98113.4,6247500,3.5917,12.56,-1.82,99.78


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6257496
  custom_metrics: {}
  date: 2021-11-08_17-31-19
  done: false
  episode_len_mean: 99.66336633663366
  episode_media: {}
  episode_reward_max: 13.810000000000022
  episode_reward_mean: 4.192178217821793
  episode_reward_min: -1.280000000000001
  episodes_this_iter: 101
  episodes_total: 67772
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0894555706244247
          entropy_coeff: 0.01
          kl: 0.01372775265603025
          policy_loss: -0.05575425863488872
          total_loss: 0.14292282059502143
          vf_explained_var: 0.9151705503463745
          vf_loss: 0.18829809666062014
    num_agent_steps_sampled: 6257496
    num_agent_steps_trained: 6257496
    num_steps_sampled: 6257496
    num_steps_trained: 6257

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,626,98245.4,6257496,4.19218,13.81,-1.28,99.6634




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6267492
  custom_metrics: {}
  date: 2021-11-08_17-33-57
  done: false
  episode_len_mean: 94.95283018867924
  episode_media: {}
  episode_reward_max: 16.78999999999994
  episode_reward_mean: 3.9749056603773676
  episode_reward_min: -1.0799999999999983
  episodes_this_iter: 106
  episodes_total: 67878
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1070522842244204
          entropy_coeff: 0.01
          kl: 0.012630216519284346
          policy_loss: -0.058772862075358374
          total_loss: 0.10754620911091821
          vf_explained_var: 0.9272792935371399
          vf_loss: 0.1586163807532981
    num_agent_steps_sampled: 6267492
    num_agent_steps_trained: 6267492
    num_steps_sampled: 6267492
    num_steps_trained: 62

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,627,98404,6267492,3.97491,16.79,-1.08,94.9528




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6277488
  custom_metrics: {}
  date: 2021-11-08_17-36-26
  done: false
  episode_len_mean: 96.10679611650485
  episode_media: {}
  episode_reward_max: 12.830000000000018
  episode_reward_mean: 4.20873786407768
  episode_reward_min: -1.3400000000000005
  episodes_this_iter: 103
  episodes_total: 67981
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1178475110958783
          entropy_coeff: 0.01
          kl: 0.012913935660341103
          policy_loss: -0.06081228264344808
          total_loss: 0.10247373833742916
          vf_explained_var: 0.9360601902008057
          vf_loss: 0.15504493564048893
    num_agent_steps_sampled: 6277488
    num_agent_steps_trained: 6277488
    num_steps_sampled: 6277488
    num_steps_trained: 627

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,628,98552.3,6277488,4.20874,12.83,-1.34,96.1068




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6287484
  custom_metrics: {}
  date: 2021-11-08_17-38-56
  done: false
  episode_len_mean: 95.0952380952381
  episode_media: {}
  episode_reward_max: 16.660000000000018
  episode_reward_mean: 4.342857142857154
  episode_reward_min: -1.1400000000000006
  episodes_this_iter: 105
  episodes_total: 68086
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.086567222256946
          entropy_coeff: 0.01
          kl: 0.012423595368888806
          policy_loss: -0.05834424680687933
          total_loss: 0.10558884851475302
          vf_explained_var: 0.9474406838417053
          vf_loss: 0.15649626353262072
    num_agent_steps_sampled: 6287484
    num_agent_steps_trained: 6287484
    num_steps_sampled: 6287484
    num_steps_trained: 6287

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,629,98703,6287484,4.34286,16.66,-1.14,95.0952


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6297480
  custom_metrics: {}
  date: 2021-11-08_17-41-11
  done: false
  episode_len_mean: 99.85148514851485
  episode_media: {}
  episode_reward_max: 13.95000000000002
  episode_reward_mean: 4.418316831683181
  episode_reward_min: -1.690000000000001
  episodes_this_iter: 101
  episodes_total: 68187
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.113251913714613
          entropy_coeff: 0.01
          kl: 0.01183459126027435
          policy_loss: -0.06069063166649932
          total_loss: 0.09622735132773717
          vf_explained_var: 0.9348501563072205
          vf_loss: 0.15108982310908983
    num_agent_steps_sampled: 6297480
    num_agent_steps_trained: 6297480
    num_steps_sampled: 6297480
    num_steps_trained: 629748

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,630,98837.8,6297480,4.41832,13.95,-1.69,99.8515


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6307476
  custom_metrics: {}
  date: 2021-11-08_17-43-29
  done: false
  episode_len_mean: 101.74
  episode_media: {}
  episode_reward_max: 16.139999999999926
  episode_reward_mean: 4.19860000000001
  episode_reward_min: -1.6999999999999982
  episodes_this_iter: 98
  episodes_total: 68285
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.118701749581557
          entropy_coeff: 0.01
          kl: 0.012393853615584182
          policy_loss: -0.05517072833151135
          total_loss: 0.13393953920652468
          vf_explained_var: 0.924375057220459
          vf_loss: 0.18206253659425892
    num_agent_steps_sampled: 6307476
    num_agent_steps_trained: 6307476
    num_steps_sampled: 6307476
    num_steps_trained: 6307476
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,631,98975.3,6307476,4.1986,16.14,-1.7,101.74




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6317472
  custom_metrics: {}
  date: 2021-11-08_17-45-48
  done: false
  episode_len_mean: 103.09
  episode_media: {}
  episode_reward_max: 14.910000000000016
  episode_reward_mean: 3.943500000000011
  episode_reward_min: -0.9300000000000006
  episodes_this_iter: 95
  episodes_total: 68380
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.123613566211146
          entropy_coeff: 0.01
          kl: 0.01311602963423951
          policy_loss: -0.05710262749980912
          total_loss: 0.1312737734654011
          vf_explained_var: 0.9243239164352417
          vf_loss: 0.1797325808642448
    num_agent_steps_sampled: 6317472
    num_agent_steps_trained: 6317472
    num_steps_sampled: 6317472
    num_steps_trained: 6317472
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,632,99114.7,6317472,3.9435,14.91,-0.93,103.09




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6327468
  custom_metrics: {}
  date: 2021-11-08_17-48-51
  done: false
  episode_len_mean: 97.28571428571429
  episode_media: {}
  episode_reward_max: 10.420000000000014
  episode_reward_mean: 3.9178095238095354
  episode_reward_min: -2.000000000000001
  episodes_this_iter: 105
  episodes_total: 68485
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.117346666715084
          entropy_coeff: 0.01
          kl: 0.012249413024605778
          policy_loss: -0.059656484259499445
          total_loss: 0.11878997089468643
          vf_explained_var: 0.9186502695083618
          vf_loss: 0.1717142253413669
    num_agent_steps_sampled: 6327468
    num_agent_steps_trained: 6327468
    num_steps_sampled: 6327468
    num_steps_trained: 632

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,633,99297.6,6327468,3.91781,10.42,-2,97.2857




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6337464
  custom_metrics: {}
  date: 2021-11-08_17-51-35
  done: false
  episode_len_mean: 97.46534653465346
  episode_media: {}
  episode_reward_max: 12.330000000000014
  episode_reward_mean: 3.97861386138615
  episode_reward_min: -1.610000000000001
  episodes_this_iter: 101
  episodes_total: 68586
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.142625557255541
          entropy_coeff: 0.01
          kl: 0.012654940707430857
          policy_loss: -0.05743785988316577
          total_loss: 0.11517724403076701
          vf_explained_var: 0.924217700958252
          vf_loss: 0.16521182093992193
    num_agent_steps_sampled: 6337464
    num_agent_steps_trained: 6337464
    num_steps_sampled: 6337464
    num_steps_trained: 633746

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,634,99461.4,6337464,3.97861,12.33,-1.61,97.4653




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6347460
  custom_metrics: {}
  date: 2021-11-08_17-53-58
  done: false
  episode_len_mean: 100.16
  episode_media: {}
  episode_reward_max: 17.030000000000012
  episode_reward_mean: 3.436900000000009
  episode_reward_min: -1.98
  episodes_this_iter: 100
  episodes_total: 68686
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.140519972438486
          entropy_coeff: 0.01
          kl: 0.0118519826825405
          policy_loss: -0.05691906264704517
          total_loss: 0.10251134436450199
          vf_explained_var: 0.9130175709724426
          vf_loss: 0.15383530862024453
    num_agent_steps_sampled: 6347460
    num_agent_steps_trained: 6347460
    num_steps_sampled: 6347460
    num_steps_trained: 6347460
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,635,99604.7,6347460,3.4369,17.03,-1.98,100.16


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6357456
  custom_metrics: {}
  date: 2021-11-08_17-56-10
  done: false
  episode_len_mean: 102.58
  episode_media: {}
  episode_reward_max: 14.14000000000002
  episode_reward_mean: 3.6482000000000117
  episode_reward_min: -1.6600000000000008
  episodes_this_iter: 98
  episodes_total: 68784
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1293492117498674
          entropy_coeff: 0.01
          kl: 0.012821190474202604
          policy_loss: -0.05517621328974636
          total_loss: 0.12006350140381827
          vf_explained_var: 0.9224836826324463
          vf_loss: 0.16732493098984416
    num_agent_steps_sampled: 6357456
    num_agent_steps_trained: 6357456
    num_steps_sampled: 6357456
    num_steps_trained: 6357456
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,636,99736,6357456,3.6482,14.14,-1.66,102.58




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6367452
  custom_metrics: {}
  date: 2021-11-08_17-58-35
  done: false
  episode_len_mean: 100.89
  episode_media: {}
  episode_reward_max: 11.25000000000001
  episode_reward_mean: 3.9467000000000114
  episode_reward_min: -1.3400000000000005
  episodes_this_iter: 100
  episodes_total: 68884
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1281569289346027
          entropy_coeff: 0.01
          kl: 0.011523526611302464
          policy_loss: -0.062103668850265505
          total_loss: 0.08576773337414886
          vf_explained_var: 0.9342751502990723
          vf_loss: 0.14290093541559246
    num_agent_steps_sampled: 6367452
    num_agent_steps_trained: 6367452
    num_steps_sampled: 6367452
    num_steps_trained: 6367452
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,637,99881.1,6367452,3.9467,11.25,-1.34,100.89




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6377448
  custom_metrics: {}
  date: 2021-11-08_18-01-16
  done: false
  episode_len_mean: 94.43809523809524
  episode_media: {}
  episode_reward_max: 12.930000000000016
  episode_reward_mean: 4.568380952380963
  episode_reward_min: -0.8100000000000006
  episodes_this_iter: 105
  episodes_total: 68989
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0799764674952907
          entropy_coeff: 0.01
          kl: 0.012693914616573142
          policy_loss: -0.05478779181567395
          total_loss: 0.12230415735234562
          vf_explained_var: 0.9285669326782227
          vf_loss: 0.16897338864863173
    num_agent_steps_sampled: 6377448
    num_agent_steps_trained: 6377448
    num_steps_sampled: 6377448
    num_steps_trained: 63

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,638,100042,6377448,4.56838,12.93,-0.81,94.4381




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6387444
  custom_metrics: {}
  date: 2021-11-08_18-04-05
  done: false
  episode_len_mean: 98.48039215686275
  episode_media: {}
  episode_reward_max: 10.780000000000017
  episode_reward_mean: 3.8176470588235403
  episode_reward_min: -1.4000000000000004
  episodes_this_iter: 102
  episodes_total: 69091
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.115636889649253
          entropy_coeff: 0.01
          kl: 0.012285372953158269
          policy_loss: -0.061657169609306714
          total_loss: 0.08872353640607852
          vf_explained_var: 0.9426584839820862
          vf_loss: 0.14354945882422548
    num_agent_steps_sampled: 6387444
    num_agent_steps_trained: 6387444
    num_steps_sampled: 6387444
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,639,100211,6387444,3.81765,10.78,-1.4,98.4804




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6397440
  custom_metrics: {}
  date: 2021-11-08_18-06-48
  done: false
  episode_len_mean: 97.00970873786407
  episode_media: {}
  episode_reward_max: 13.000000000000012
  episode_reward_mean: 4.215436893203895
  episode_reward_min: -1.2500000000000004
  episodes_this_iter: 103
  episodes_total: 69194
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0876201935303516
          entropy_coeff: 0.01
          kl: 0.012420178382621714
          policy_loss: -0.060282779534339394
          total_loss: 0.10409262371209697
          vf_explained_var: 0.936872124671936
          vf_loss: 0.15695688616261522
    num_agent_steps_sampled: 6397440
    num_agent_steps_trained: 6397440
    num_steps_sampled: 6397440
    num_steps_trained: 63

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,640,100374,6397440,4.21544,13,-1.25,97.0097


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6407436
  custom_metrics: {}
  date: 2021-11-08_18-09-04
  done: false
  episode_len_mean: 99.8019801980198
  episode_media: {}
  episode_reward_max: 14.380000000000017
  episode_reward_mean: 4.541089108910901
  episode_reward_min: -1.4800000000000004
  episodes_this_iter: 101
  episodes_total: 69295
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0969147297052237
          entropy_coeff: 0.01
          kl: 0.013175122926060654
          policy_loss: -0.056612046378163194
          total_loss: 0.12676224902622465
          vf_explained_var: 0.928215742111206
          vf_loss: 0.1743288649723698
    num_agent_steps_sampled: 6407436
    num_agent_steps_trained: 6407436
    num_steps_sampled: 6407436
    num_steps_trained: 6407

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,641,100510,6407436,4.54109,14.38,-1.48,99.802




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6417432
  custom_metrics: {}
  date: 2021-11-08_18-12-07
  done: false
  episode_len_mean: 97.02941176470588
  episode_media: {}
  episode_reward_max: 13.130000000000011
  episode_reward_mean: 3.7358823529411853
  episode_reward_min: -1.3400000000000005
  episodes_this_iter: 102
  episodes_total: 69397
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0973756942993553
          entropy_coeff: 0.01
          kl: 0.012060053976511814
          policy_loss: -0.05606928283874041
          total_loss: 0.12063362407935863
          vf_explained_var: 0.9262916445732117
          vf_loss: 0.17020235240427603
    num_agent_steps_sampled: 6417432
    num_agent_steps_trained: 6417432
    num_steps_sampled: 6417432
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,642,100693,6417432,3.73588,13.13,-1.34,97.0294




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6427428
  custom_metrics: {}
  date: 2021-11-08_18-14-55
  done: false
  episode_len_mean: 94.33962264150944
  episode_media: {}
  episode_reward_max: 15.190000000000012
  episode_reward_mean: 4.004905660377369
  episode_reward_min: -1.1800000000000004
  episodes_this_iter: 106
  episodes_total: 69503
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1128677553600737
          entropy_coeff: 0.01
          kl: 0.012689316536046337
          policy_loss: -0.0610414585385185
          total_loss: 0.1118976059791624
          vf_explained_var: 0.9296600818634033
          vf_loss: 0.1651598918880535
    num_agent_steps_sampled: 6427428
    num_agent_steps_trained: 6427428
    num_steps_sampled: 6427428
    num_steps_trained: 64274

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,643,100861,6427428,4.00491,15.19,-1.18,94.3396




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6437424
  custom_metrics: {}
  date: 2021-11-08_18-17-43
  done: false
  episode_len_mean: 94.95283018867924
  episode_media: {}
  episode_reward_max: 14.550000000000017
  episode_reward_mean: 4.04754716981133
  episode_reward_min: -1.830000000000001
  episodes_this_iter: 106
  episodes_total: 69609
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1087853924840942
          entropy_coeff: 0.01
          kl: 0.01197881673052555
          policy_loss: -0.05932621809247977
          total_loss: 0.1060474258218693
          vf_explained_var: 0.9361407160758972
          vf_loss: 0.15917225453015577
    num_agent_steps_sampled: 6437424
    num_agent_steps_trained: 6437424
    num_steps_sampled: 6437424
    num_steps_trained: 643742

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,644,101029,6437424,4.04755,14.55,-1.83,94.9528




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6447420
  custom_metrics: {}
  date: 2021-11-08_18-20-13
  done: false
  episode_len_mean: 95.22115384615384
  episode_media: {}
  episode_reward_max: 12.630000000000022
  episode_reward_mean: 4.233557692307703
  episode_reward_min: -0.8600000000000003
  episodes_this_iter: 104
  episodes_total: 69713
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0858237811642835
          entropy_coeff: 0.01
          kl: 0.012950469971812945
          policy_loss: -0.05882630376861646
          total_loss: 0.12210778764679901
          vf_explained_var: 0.9263931512832642
          vf_loss: 0.1722895385331323
    num_agent_steps_sampled: 6447420
    num_agent_steps_trained: 6447420
    num_steps_sampled: 6447420
    num_steps_trained: 644

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,645,101179,6447420,4.23356,12.63,-0.86,95.2212




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6457416
  custom_metrics: {}
  date: 2021-11-08_18-23-03
  done: false
  episode_len_mean: 97.03921568627452
  episode_media: {}
  episode_reward_max: 12.560000000000018
  episode_reward_mean: 4.288921568627463
  episode_reward_min: -1.3500000000000008
  episodes_this_iter: 102
  episodes_total: 69815
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1050481245049046
          entropy_coeff: 0.01
          kl: 0.01333033855723909
          policy_loss: -0.05739469797565387
          total_loss: 0.13259072835979044
          vf_explained_var: 0.9308678507804871
          vf_loss: 0.18066772976651405
    num_agent_steps_sampled: 6457416
    num_agent_steps_trained: 6457416
    num_steps_sampled: 6457416
    num_steps_trained: 645

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,646,101349,6457416,4.28892,12.56,-1.35,97.0392




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6467412
  custom_metrics: {}
  date: 2021-11-08_18-26-03
  done: false
  episode_len_mean: 96.59615384615384
  episode_media: {}
  episode_reward_max: 11.88000000000002
  episode_reward_mean: 3.826923076923087
  episode_reward_min: -1.6099999999999979
  episodes_this_iter: 104
  episodes_total: 69919
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.091619976565369
          entropy_coeff: 0.01
          kl: 0.01251823294174947
          policy_loss: -0.0557683488035686
          total_loss: 0.11929559134042415
          vf_explained_var: 0.937125563621521
          vf_loss: 0.1674620404298234
    num_agent_steps_sampled: 6467412
    num_agent_steps_trained: 6467412
    num_steps_sampled: 6467412
    num_steps_trained: 6467412


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,647,101529,6467412,3.82692,11.88,-1.61,96.5962


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6477408
  custom_metrics: {}
  date: 2021-11-08_18-28-19
  done: false
  episode_len_mean: 98.25490196078431
  episode_media: {}
  episode_reward_max: 18.889999999999983
  episode_reward_mean: 4.084019607843147
  episode_reward_min: -1.5400000000000005
  episodes_this_iter: 102
  episodes_total: 70021
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.089839349954556
          entropy_coeff: 0.01
          kl: 0.012614911502128828
          policy_loss: -0.06073410746951898
          total_loss: 0.12315531094900818
          vf_explained_var: 0.9235032200813293
          vf_loss: 0.17604946562240267
    num_agent_steps_sampled: 6477408
    num_agent_steps_trained: 6477408
    num_steps_sampled: 6477408
    num_steps_trained: 647

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,648,101665,6477408,4.08402,18.89,-1.54,98.2549




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6487404
  custom_metrics: {}
  date: 2021-11-08_18-31-18
  done: false
  episode_len_mean: 94.61320754716981
  episode_media: {}
  episode_reward_max: 14.790000000000015
  episode_reward_mean: 4.027358490566047
  episode_reward_min: -1.4400000000000008
  episodes_this_iter: 106
  episodes_total: 70127
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.103625282059368
          entropy_coeff: 0.01
          kl: 0.013175593282941418
          policy_loss: -0.059901044509795484
          total_loss: 0.12977767544519953
          vf_explained_var: 0.9241238236427307
          vf_loss: 0.18069932305564482
    num_agent_steps_sampled: 6487404
    num_agent_steps_trained: 6487404
    num_steps_sampled: 6487404
    num_steps_trained: 64

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,649,101844,6487404,4.02736,14.79,-1.44,94.6132




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6497400
  custom_metrics: {}
  date: 2021-11-08_18-33-58
  done: false
  episode_len_mean: 94.60377358490567
  episode_media: {}
  episode_reward_max: 14.770000000000016
  episode_reward_mean: 3.985283018867934
  episode_reward_min: -1.5400000000000007
  episodes_this_iter: 106
  episodes_total: 70233
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0809662480639597
          entropy_coeff: 0.01
          kl: 0.012960156338965927
          policy_loss: -0.05963583439747747
          total_loss: 0.1294702168291387
          vf_explained_var: 0.9108231663703918
          vf_loss: 0.1803908573049638
    num_agent_steps_sampled: 6497400
    num_agent_steps_trained: 6497400
    num_steps_sampled: 6497400
    num_steps_trained: 6497

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,650,102004,6497400,3.98528,14.77,-1.54,94.6038




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6507396
  custom_metrics: {}
  date: 2021-11-08_18-36-31
  done: false
  episode_len_mean: 92.43518518518519
  episode_media: {}
  episode_reward_max: 12.810000000000016
  episode_reward_mean: 3.3151851851851926
  episode_reward_min: -2.04
  episodes_this_iter: 108
  episodes_total: 70341
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.084811293467497
          entropy_coeff: 0.01
          kl: 0.012180233240774286
          policy_loss: -0.05955448549352268
          total_loss: 0.09584483133199123
          vf_explained_var: 0.9319225549697876
          vf_loss: 0.1484993351599536
    num_agent_steps_sampled: 6507396
    num_agent_steps_trained: 6507396
    num_steps_sampled: 6507396
    num_steps_trained: 6507396
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,651,102157,6507396,3.31519,12.81,-2.04,92.4352


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6517392
  custom_metrics: {}
  date: 2021-11-08_18-38-51
  done: false
  episode_len_mean: 95.45714285714286
  episode_media: {}
  episode_reward_max: 14.890000000000015
  episode_reward_mean: 4.106857142857153
  episode_reward_min: -1.4100000000000006
  episodes_this_iter: 105
  episodes_total: 70446
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0801804209366823
          entropy_coeff: 0.01
          kl: 0.011751431347077694
          policy_loss: -0.05845319085682814
          total_loss: 0.1154877215050734
          vf_explained_var: 0.9304966926574707
          vf_loss: 0.16797148517500132
    num_agent_steps_sampled: 6517392
    num_agent_steps_trained: 6517392
    num_steps_sampled: 6517392
    num_steps_trained: 651

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,652,102296,6517392,4.10686,14.89,-1.41,95.4571




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6527388
  custom_metrics: {}
  date: 2021-11-08_18-41-23
  done: false
  episode_len_mean: 95.57692307692308
  episode_media: {}
  episode_reward_max: 14.37000000000002
  episode_reward_mean: 3.523365384615394
  episode_reward_min: -1.6300000000000006
  episodes_this_iter: 104
  episodes_total: 70550
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0868625773323908
          entropy_coeff: 0.01
          kl: 0.012281185997203625
          policy_loss: -0.05871092019140975
          total_loss: 0.08944147542899109
          vf_explained_var: 0.9207701086997986
          vf_loss: 0.1410429445318241
    num_agent_steps_sampled: 6527388
    num_agent_steps_trained: 6527388
    num_steps_sampled: 6527388
    num_steps_trained: 6527

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,653,102448,6527388,3.52337,14.37,-1.63,95.5769




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6537384
  custom_metrics: {}
  date: 2021-11-08_18-44-26
  done: false
  episode_len_mean: 90.78181818181818
  episode_media: {}
  episode_reward_max: 12.01000000000002
  episode_reward_mean: 3.3275454545454624
  episode_reward_min: -1.5900000000000007
  episodes_this_iter: 110
  episodes_total: 70660
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0588995433261252
          entropy_coeff: 0.01
          kl: 0.01224345118614928
          policy_loss: -0.058824482902438725
          total_loss: 0.09984949406347851
          vf_explained_var: 0.9150777459144592
          vf_loss: 0.1513708602844013
    num_agent_steps_sampled: 6537384
    num_agent_steps_trained: 6537384
    num_steps_sampled: 6537384
    num_steps_trained: 653

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,654,102632,6537384,3.32755,12.01,-1.59,90.7818




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6547380
  custom_metrics: {}
  date: 2021-11-08_18-46-59
  done: false
  episode_len_mean: 92.4862385321101
  episode_media: {}
  episode_reward_max: 10.850000000000012
  episode_reward_mean: 3.811284403669734
  episode_reward_min: -1.4300000000000006
  episodes_this_iter: 109
  episodes_total: 70769
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0567455400768506
          entropy_coeff: 0.01
          kl: 0.012928914546814635
          policy_loss: -0.06118620125592774
          total_loss: 0.09575088823962416
          vf_explained_var: 0.9334475994110107
          vf_loss: 0.14805085984910402
    num_agent_steps_sampled: 6547380
    num_agent_steps_trained: 6547380
    num_steps_sampled: 6547380
    num_steps_trained: 654

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,655,102784,6547380,3.81128,10.85,-1.43,92.4862




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6557376
  custom_metrics: {}
  date: 2021-11-08_18-49-33
  done: false
  episode_len_mean: 91.1559633027523
  episode_media: {}
  episode_reward_max: 16.639999999999993
  episode_reward_mean: 3.522201834862393
  episode_reward_min: -1.4000000000000006
  episodes_this_iter: 109
  episodes_total: 70878
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0385164262902022
          entropy_coeff: 0.01
          kl: 0.013011471904397115
          policy_loss: -0.058942986672951114
          total_loss: 0.10595343878699673
          vf_explained_var: 0.9361138343811035
          vf_loss: 0.15563983065832374
    num_agent_steps_sampled: 6557376
    num_agent_steps_trained: 6557376
    num_steps_sampled: 6557376
    num_steps_trained: 65

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,656,102938,6557376,3.5222,16.64,-1.4,91.156




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6567372
  custom_metrics: {}
  date: 2021-11-08_18-52-31
  done: false
  episode_len_mean: 91.83636363636364
  episode_media: {}
  episode_reward_max: 18.54999999999993
  episode_reward_mean: 3.942181818181826
  episode_reward_min: -1.329999999999999
  episodes_this_iter: 110
  episodes_total: 70988
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0532801565960943
          entropy_coeff: 0.01
          kl: 0.013003104758408318
          policy_loss: -0.057055527441458316
          total_loss: 0.12806594168815094
          vf_explained_var: 0.9209141135215759
          vf_loss: 0.17603157256912982
    num_agent_steps_sampled: 6567372
    num_agent_steps_trained: 6567372
    num_steps_sampled: 6567372
    num_steps_trained: 656

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,657,103116,6567372,3.94218,18.55,-1.33,91.8364




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6577368
  custom_metrics: {}
  date: 2021-11-08_18-55-38
  done: false
  episode_len_mean: 89.9
  episode_media: {}
  episode_reward_max: 14.58000000000001
  episode_reward_mean: 4.314545454545464
  episode_reward_min: -1.5500000000000007
  episodes_this_iter: 110
  episodes_total: 71098
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0447927937548385
          entropy_coeff: 0.01
          kl: 0.012646400089128604
          policy_loss: -0.0579728945166382
          total_loss: 0.1486119363250004
          vf_explained_var: 0.9274337887763977
          vf_loss: 0.19822267837758756
    num_agent_steps_sampled: 6577368
    num_agent_steps_trained: 6577368
    num_steps_sampled: 6577368
    num_steps_trained: 6577368
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,658,103303,6577368,4.31455,14.58,-1.55,89.9




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6587364
  custom_metrics: {}
  date: 2021-11-08_18-58-27
  done: false
  episode_len_mean: 92.09174311926606
  episode_media: {}
  episode_reward_max: 17.819999999999936
  episode_reward_mean: 4.060733944954136
  episode_reward_min: -2.1300000000000003
  episodes_this_iter: 109
  episodes_total: 71207
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.072230127428332
          entropy_coeff: 0.01
          kl: 0.012788564812094581
          policy_loss: -0.05713152786095937
          total_loss: 0.13978665615153363
          vf_explained_var: 0.9369814395904541
          vf_loss: 0.18850653428170416
    num_agent_steps_sampled: 6587364
    num_agent_steps_trained: 6587364
    num_steps_sampled: 6587364
    num_steps_trained: 658

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,659,103472,6587364,4.06073,17.82,-2.13,92.0917




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6597360
  custom_metrics: {}
  date: 2021-11-08_19-01-01
  done: false
  episode_len_mean: 90.42727272727272
  episode_media: {}
  episode_reward_max: 14.680000000000016
  episode_reward_mean: 3.867000000000009
  episode_reward_min: -1.3500000000000005
  episodes_this_iter: 110
  episodes_total: 71317
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0488222814013817
          entropy_coeff: 0.01
          kl: 0.01176159091558544
          policy_loss: -0.05700748407751576
          total_loss: 0.10305491057901174
          vf_explained_var: 0.9361528158187866
          vf_loss: 0.1537562425224445
    num_agent_steps_sampled: 6597360
    num_agent_steps_trained: 6597360
    num_steps_sampled: 6597360
    num_steps_trained: 6597

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,660,103627,6597360,3.867,14.68,-1.35,90.4273




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6607356
  custom_metrics: {}
  date: 2021-11-08_19-04-20
  done: false
  episode_len_mean: 90.60909090909091
  episode_media: {}
  episode_reward_max: 14.67000000000002
  episode_reward_mean: 4.455090909090918
  episode_reward_min: -1.1400000000000008
  episodes_this_iter: 110
  episodes_total: 71427
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.045650729562482
          entropy_coeff: 0.01
          kl: 0.012647303857855619
          policy_loss: -0.05408687591552734
          total_loss: 0.1184812245468617
          vf_explained_var: 0.9318872690200806
          vf_loss: 0.1642124674609329
    num_agent_steps_sampled: 6607356
    num_agent_steps_trained: 6607356
    num_steps_sampled: 6607356
    num_steps_trained: 660735

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,661,103825,6607356,4.45509,14.67,-1.14,90.6091




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6617352
  custom_metrics: {}
  date: 2021-11-08_19-07-11
  done: false
  episode_len_mean: 89.89285714285714
  episode_media: {}
  episode_reward_max: 16.44999999999996
  episode_reward_mean: 3.442410714285722
  episode_reward_min: -1.6400000000000008
  episodes_this_iter: 112
  episodes_total: 71539
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.068394163225451
          entropy_coeff: 0.01
          kl: 0.011890022578185425
          policy_loss: -0.057687324008498436
          total_loss: 0.0862324164623124
          vf_explained_var: 0.9361966848373413
          vf_loss: 0.1375167246669149
    num_agent_steps_sampled: 6617352
    num_agent_steps_trained: 6617352
    num_steps_sampled: 6617352
    num_steps_trained: 66173

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,662,103997,6617352,3.44241,16.45,-1.64,89.8929




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6627348
  custom_metrics: {}
  date: 2021-11-08_19-09-57
  done: false
  episode_len_mean: 92.61111111111111
  episode_media: {}
  episode_reward_max: 14.33000000000002
  episode_reward_mean: 3.902500000000009
  episode_reward_min: -1.400000000000001
  episodes_this_iter: 108
  episodes_total: 71647
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0757303278670354
          entropy_coeff: 0.01
          kl: 0.013318360042443278
          policy_loss: -0.05744982808828354
          total_loss: 0.13740611928316135
          vf_explained_var: 0.908545732498169
          vf_loss: 0.18527236057716048
    num_agent_steps_sampled: 6627348
    num_agent_steps_trained: 6627348
    num_steps_sampled: 6627348
    num_steps_trained: 66273

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,663,104162,6627348,3.9025,14.33,-1.4,92.6111




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6637344
  custom_metrics: {}
  date: 2021-11-08_19-12-34
  done: false
  episode_len_mean: 91.0
  episode_media: {}
  episode_reward_max: 14.980000000000015
  episode_reward_mean: 4.571834862385331
  episode_reward_min: -1.0300000000000007
  episodes_this_iter: 109
  episodes_total: 71756
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.05213249371602
          entropy_coeff: 0.01
          kl: 0.01220780169017648
          policy_loss: -0.05517318899400978
          total_loss: 0.11117317619550432
          vf_explained_var: 0.946626603603363
          vf_loss: 0.15905679054399077
    num_agent_steps_sampled: 6637344
    num_agent_steps_trained: 6637344
    num_steps_sampled: 6637344
    num_steps_trained: 6637344
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,664,104319,6637344,4.57183,14.98,-1.03,91


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6647340
  custom_metrics: {}
  date: 2021-11-08_19-14-57
  done: false
  episode_len_mean: 93.25925925925925
  episode_media: {}
  episode_reward_max: 16.52000000000002
  episode_reward_mean: 3.7680555555555637
  episode_reward_min: -1.560000000000001
  episodes_this_iter: 108
  episodes_total: 71864
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0838170761736032
          entropy_coeff: 0.01
          kl: 0.012471645043517622
          policy_loss: -0.06009361004440958
          total_loss: 0.11231807237252212
          vf_explained_var: 0.9350976943969727
          vf_loss: 0.1648378856941803
    num_agent_steps_sampled: 6647340
    num_agent_steps_trained: 6647340
    num_steps_sampled: 6647340
    num_steps_trained: 6647

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,665,104462,6647340,3.76806,16.52,-1.56,93.2593




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6657336
  custom_metrics: {}
  date: 2021-11-08_19-17-43
  done: false
  episode_len_mean: 90.60909090909091
  episode_media: {}
  episode_reward_max: 10.750000000000014
  episode_reward_mean: 3.498272727272735
  episode_reward_min: -1.6100000000000005
  episodes_this_iter: 110
  episodes_total: 71974
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.039556627497714
          entropy_coeff: 0.01
          kl: 0.01131186565481237
          policy_loss: -0.055956165325374174
          total_loss: 0.11080751954617664
          vf_explained_var: 0.9292172789573669
          vf_loss: 0.16138940789601486
    num_agent_steps_sampled: 6657336
    num_agent_steps_trained: 6657336
    num_steps_sampled: 6657336
    num_steps_trained: 665

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,666,104628,6657336,3.49827,10.75,-1.61,90.6091




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6667332
  custom_metrics: {}
  date: 2021-11-08_19-20-33
  done: false
  episode_len_mean: 87.09482758620689
  episode_media: {}
  episode_reward_max: 10.890000000000013
  episode_reward_mean: 3.7704310344827676
  episode_reward_min: -1.1600000000000006
  episodes_this_iter: 116
  episodes_total: 72090
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0482063471761522
          entropy_coeff: 0.01
          kl: 0.011337388518588122
          policy_loss: -0.05829283062559672
          total_loss: 0.08309171022051293
          vf_explained_var: 0.9395766854286194
          vf_loss: 0.13603861533009853
    num_agent_steps_sampled: 6667332
    num_agent_steps_trained: 6667332
    num_steps_sampled: 6667332
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,667,104798,6667332,3.77043,10.89,-1.16,87.0948




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6677328
  custom_metrics: {}
  date: 2021-11-08_19-23-08
  done: false
  episode_len_mean: 91.13761467889908
  episode_media: {}
  episode_reward_max: 14.440000000000015
  episode_reward_mean: 3.713302752293586
  episode_reward_min: -1.4700000000000009
  episodes_this_iter: 109
  episodes_total: 72199
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.052867533916082
          entropy_coeff: 0.01
          kl: 0.012989955181422444
          policy_loss: -0.05958310084767703
          total_loss: 0.11396348226624421
          vf_explained_var: 0.9297605156898499
          vf_loss: 0.1644825162143152
    num_agent_steps_sampled: 6677328
    num_agent_steps_trained: 6677328
    num_steps_sampled: 6677328
    num_steps_trained: 6677

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,668,104953,6677328,3.7133,14.44,-1.47,91.1376


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6687324
  custom_metrics: {}
  date: 2021-11-08_19-25-34
  done: false
  episode_len_mean: 91.50925925925925
  episode_media: {}
  episode_reward_max: 20.54999999999999
  episode_reward_mean: 4.513796296296305
  episode_reward_min: -1.2799999999999978
  episodes_this_iter: 108
  episodes_total: 72307
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0488787742761465
          entropy_coeff: 0.01
          kl: 0.012592684303330083
          policy_loss: -0.05896738350598348
          total_loss: 0.1251894233159275
          vf_explained_var: 0.9376405477523804
          vf_loss: 0.1759578850120306
    num_agent_steps_sampled: 6687324
    num_agent_steps_trained: 6687324
    num_steps_sampled: 6687324
    num_steps_trained: 66873

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,669,105099,6687324,4.5138,20.55,-1.28,91.5093




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6697320
  custom_metrics: {}
  date: 2021-11-08_19-28-13
  done: false
  episode_len_mean: 88.87610619469027
  episode_media: {}
  episode_reward_max: 12.570000000000018
  episode_reward_mean: 3.9998230088495665
  episode_reward_min: -1.5000000000000002
  episodes_this_iter: 113
  episodes_total: 72420
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0482296967098854
          entropy_coeff: 0.01
          kl: 0.011904552048491007
          policy_loss: -0.06024129504385667
          total_loss: 0.10008947127458886
          vf_explained_var: 0.9348509907722473
          vf_loss: 0.15369300558749172
    num_agent_steps_sampled: 6697320
    num_agent_steps_trained: 6697320
    num_steps_sampled: 6697320
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,670,105258,6697320,3.99982,12.57,-1.5,88.8761




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6707316
  custom_metrics: {}
  date: 2021-11-08_19-31-13
  done: false
  episode_len_mean: 87.34782608695652
  episode_media: {}
  episode_reward_max: 14.410000000000014
  episode_reward_mean: 4.022086956521747
  episode_reward_min: -1.3800000000000006
  episodes_this_iter: 115
  episodes_total: 72535
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.043817471744668
          entropy_coeff: 0.01
          kl: 0.01204184463728615
          policy_loss: -0.06041968185932208
          total_loss: 0.10922047789407592
          vf_explained_var: 0.93963223695755
          vf_loss: 0.16264550572531855
    num_agent_steps_sampled: 6707316
    num_agent_steps_trained: 6707316
    num_steps_sampled: 6707316
    num_steps_trained: 670731

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,671,105438,6707316,4.02209,14.41,-1.38,87.3478




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6717312
  custom_metrics: {}
  date: 2021-11-08_19-33-50
  done: false
  episode_len_mean: 90.85454545454546
  episode_media: {}
  episode_reward_max: 14.570000000000011
  episode_reward_mean: 4.538000000000009
  episode_reward_min: -1.5400000000000011
  episodes_this_iter: 110
  episodes_total: 72645
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.051485375359527
          entropy_coeff: 0.01
          kl: 0.013518658704588902
          policy_loss: -0.05637620610431728
          total_loss: 0.12390483755490973
          vf_explained_var: 0.9349269270896912
          vf_loss: 0.16999870168092923
    num_agent_steps_sampled: 6717312
    num_agent_steps_trained: 6717312
    num_steps_sampled: 6717312
    num_steps_trained: 671

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,672,105594,6717312,4.538,14.57,-1.54,90.8545




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6727308
  custom_metrics: {}
  date: 2021-11-08_19-36-41
  done: false
  episode_len_mean: 89.29464285714286
  episode_media: {}
  episode_reward_max: 16.89000000000002
  episode_reward_mean: 4.4853571428571515
  episode_reward_min: -1.4100000000000006
  episodes_this_iter: 112
  episodes_total: 72757
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0140828080666373
          entropy_coeff: 0.01
          kl: 0.01305033847750287
          policy_loss: -0.057715468684959616
          total_loss: 0.13676678400295667
          vf_explained_var: 0.9386956691741943
          vf_loss: 0.18489277753978967
    num_agent_steps_sampled: 6727308
    num_agent_steps_trained: 6727308
    num_steps_sampled: 6727308
    num_steps_trained: 67

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,673,105765,6727308,4.48536,16.89,-1.41,89.2946




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6737304
  custom_metrics: {}
  date: 2021-11-08_19-39-17
  done: false
  episode_len_mean: 90.47272727272727
  episode_media: {}
  episode_reward_max: 14.820000000000011
  episode_reward_mean: 4.112636363636372
  episode_reward_min: -0.9800000000000005
  episodes_this_iter: 110
  episodes_total: 72867
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.037155901672494
          entropy_coeff: 0.01
          kl: 0.012201254191969848
          policy_loss: -0.054956895652680825
          total_loss: 0.12824161612293405
          vf_explained_var: 0.9315275549888611
          vf_loss: 0.17577408788582452
    num_agent_steps_sampled: 6737304
    num_agent_steps_trained: 6737304
    num_steps_sampled: 6737304
    num_steps_trained: 67

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,674,105922,6737304,4.11264,14.82,-0.98,90.4727




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6747300
  custom_metrics: {}
  date: 2021-11-08_19-41-54
  done: false
  episode_len_mean: 89.625
  episode_media: {}
  episode_reward_max: 14.620000000000019
  episode_reward_mean: 3.841785714285723
  episode_reward_min: -1.1700000000000006
  episodes_this_iter: 112
  episodes_total: 72979
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0255638871437465
          entropy_coeff: 0.01
          kl: 0.011488027854503693
          policy_loss: -0.05605028050386498
          total_loss: 0.10603025279079492
          vf_explained_var: 0.9226750135421753
          vf_loss: 0.15616500603401254
    num_agent_steps_sampled: 6747300
    num_agent_steps_trained: 6747300
    num_steps_sampled: 6747300
    num_steps_trained: 6747300
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,675,106079,6747300,3.84179,14.62,-1.17,89.625




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6757296
  custom_metrics: {}
  date: 2021-11-08_19-44-47
  done: false
  episode_len_mean: 89.84821428571429
  episode_media: {}
  episode_reward_max: 16.44999999999995
  episode_reward_mean: 4.28642857142858
  episode_reward_min: -1.3800000000000008
  episodes_this_iter: 112
  episodes_total: 73091
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.035893272029029
          entropy_coeff: 0.01
          kl: 0.012408182969954948
          policy_loss: -0.05811852166731643
          total_loss: 0.11473704184660226
          vf_explained_var: 0.9352589249610901
          vf_loss: 0.16494710441901644
    num_agent_steps_sampled: 6757296
    num_agent_steps_trained: 6757296
    num_steps_sampled: 6757296
    num_steps_trained: 67572

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,676,106251,6757296,4.28643,16.45,-1.38,89.8482




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6767292
  custom_metrics: {}
  date: 2021-11-08_19-47-26
  done: false
  episode_len_mean: 91.12844036697248
  episode_media: {}
  episode_reward_max: 14.820000000000016
  episode_reward_mean: 3.611834862385329
  episode_reward_min: -1.5200000000000005
  episodes_this_iter: 109
  episodes_total: 73200
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0346881648414157
          entropy_coeff: 0.01
          kl: 0.011596301816615903
          policy_loss: -0.06048599410897646
          total_loss: 0.088369267522238
          vf_explained_var: 0.9366360902786255
          vf_loss: 0.1427843184520801
    num_agent_steps_sampled: 6767292
    num_agent_steps_trained: 6767292
    num_steps_sampled: 6767292
    num_steps_trained: 67672

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,677,106410,6767292,3.61183,14.82,-1.52,91.1284




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6777288
  custom_metrics: {}
  date: 2021-11-08_19-50-08
  done: false
  episode_len_mean: 92.27777777777777
  episode_media: {}
  episode_reward_max: 12.790000000000013
  episode_reward_mean: 4.669907407407418
  episode_reward_min: -1.2800000000000002
  episodes_this_iter: 108
  episodes_total: 73308
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0359113259193227
          entropy_coeff: 0.01
          kl: 0.012223492169840108
          policy_loss: -0.060205752736864945
          total_loss: 0.11451310245559002
          vf_explained_var: 0.9384337067604065
          vf_loss: 0.16723132592458756
    num_agent_steps_sampled: 6777288
    num_agent_steps_trained: 6777288
    num_steps_sampled: 6777288
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,678,106572,6777288,4.66991,12.79,-1.28,92.2778




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6787284
  custom_metrics: {}
  date: 2021-11-08_19-53-07
  done: false
  episode_len_mean: 91.14678899082568
  episode_media: {}
  episode_reward_max: 18.16999999999995
  episode_reward_mean: 4.174678899082577
  episode_reward_min: -1.4600000000000004
  episodes_this_iter: 109
  episodes_total: 73417
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0409505027991073
          entropy_coeff: 0.01
          kl: 0.01365069693945125
          policy_loss: -0.05757736635641155
          total_loss: 0.11907471570098757
          vf_explained_var: 0.9348528385162354
          vf_loss: 0.1659635922210848
    num_agent_steps_sampled: 6787284
    num_agent_steps_trained: 6787284
    num_steps_sampled: 6787284
    num_steps_trained: 67872

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,679,106752,6787284,4.17468,18.17,-1.46,91.1468




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6797280
  custom_metrics: {}
  date: 2021-11-08_19-55-46
  done: false
  episode_len_mean: 90.82882882882883
  episode_media: {}
  episode_reward_max: 18.249999999999932
  episode_reward_mean: 4.287927927927936
  episode_reward_min: -0.7300000000000006
  episodes_this_iter: 111
  episodes_total: 73528
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.037274308489938
          entropy_coeff: 0.01
          kl: 0.012596058579502512
          policy_loss: -0.06029103222605573
          total_loss: 0.12158343871959891
          vf_explained_var: 0.9329907298088074
          vf_loss: 0.17355181750897158
    num_agent_steps_sampled: 6797280
    num_agent_steps_trained: 6797280
    num_steps_sampled: 6797280
    num_steps_trained: 679

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,680,106911,6797280,4.28793,18.25,-0.73,90.8288


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6807276
  custom_metrics: {}
  date: 2021-11-08_19-58-10
  done: false
  episode_len_mean: 92.4954128440367
  episode_media: {}
  episode_reward_max: 16.479999999999933
  episode_reward_mean: 3.7078899082568886
  episode_reward_min: -1.4200000000000004
  episodes_this_iter: 109
  episodes_total: 73637
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.068667156268389
          entropy_coeff: 0.01
          kl: 0.012161456844881176
          policy_loss: -0.05299088796361899
          total_loss: 0.1241942315752435
          vf_explained_var: 0.9226062893867493
          vf_loss: 0.17016647160690054
    num_agent_steps_sampled: 6807276
    num_agent_steps_trained: 6807276
    num_steps_sampled: 6807276
    num_steps_trained: 6807

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,681,107054,6807276,3.70789,16.48,-1.42,92.4954




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6817272
  custom_metrics: {}
  date: 2021-11-08_20-01-05
  done: false
  episode_len_mean: 92.95327102803738
  episode_media: {}
  episode_reward_max: 13.03000000000001
  episode_reward_mean: 4.2073831775701045
  episode_reward_min: -1.560000000000001
  episodes_this_iter: 107
  episodes_total: 73744
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0676602309585634
          entropy_coeff: 0.01
          kl: 0.011933510291809916
          policy_loss: -0.05963562399339981
          total_loss: 0.11888500071870975
          vf_explained_var: 0.9285861253738403
          vf_loss: 0.1720111978534832
    num_agent_steps_sampled: 6817272
    num_agent_steps_trained: 6817272
    num_steps_sampled: 6817272
    num_steps_trained: 6817

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,682,107230,6817272,4.20738,13.03,-1.56,92.9533




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6827268
  custom_metrics: {}
  date: 2021-11-08_20-03-42
  done: false
  episode_len_mean: 91.53703703703704
  episode_media: {}
  episode_reward_max: 13.080000000000014
  episode_reward_mean: 3.8485185185185276
  episode_reward_min: -1.3300000000000003
  episodes_this_iter: 108
  episodes_total: 73852
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0524885448635133
          entropy_coeff: 0.01
          kl: 0.01171596239164418
          policy_loss: -0.05844462987067353
          total_loss: 0.08984246070170378
          vf_explained_var: 0.9414390325546265
          vf_loss: 0.1421215483600385
    num_agent_steps_sampled: 6827268
    num_agent_steps_trained: 6827268
    num_steps_sampled: 6827268
    num_steps_trained: 682

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,683,107386,6827268,3.84852,13.08,-1.33,91.537




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6837264
  custom_metrics: {}
  date: 2021-11-08_20-06-16
  done: false
  episode_len_mean: 91.8256880733945
  episode_media: {}
  episode_reward_max: 12.750000000000014
  episode_reward_mean: 4.0365137614679
  episode_reward_min: -1.2100000000000006
  episodes_this_iter: 109
  episodes_total: 73961
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.04130304195942
          entropy_coeff: 0.01
          kl: 0.012998302929249287
          policy_loss: -0.055972027014463376
          total_loss: 0.13435903646561326
          vf_explained_var: 0.9238901138305664
          vf_loss: 0.18113233413005997
    num_agent_steps_sampled: 6837264
    num_agent_steps_trained: 6837264
    num_steps_sampled: 6837264
    num_steps_trained: 683726

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,684,107540,6837264,4.03651,12.75,-1.21,91.8257




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6847260
  custom_metrics: {}
  date: 2021-11-08_20-09-13
  done: false
  episode_len_mean: 91.33944954128441
  episode_media: {}
  episode_reward_max: 13.98000000000002
  episode_reward_mean: 4.074311926605513
  episode_reward_min: -1.0200000000000005
  episodes_this_iter: 109
  episodes_total: 74070
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.070141843852834
          entropy_coeff: 0.01
          kl: 0.011833602611334008
          policy_loss: -0.059027267752103825
          total_loss: 0.11018094675074148
          vf_explained_var: 0.9345974326133728
          vf_loss: 0.16295120590645024
    num_agent_steps_sampled: 6847260
    num_agent_steps_trained: 6847260
    num_steps_sampled: 6847260
    num_steps_trained: 684

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,685,107717,6847260,4.07431,13.98,-1.02,91.3394




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6857256
  custom_metrics: {}
  date: 2021-11-08_20-12-29
  done: false
  episode_len_mean: 88.3859649122807
  episode_media: {}
  episode_reward_max: 14.720000000000013
  episode_reward_mean: 3.9240350877193064
  episode_reward_min: -1.4700000000000006
  episodes_this_iter: 114
  episodes_total: 74184
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.039184144215706
          entropy_coeff: 0.01
          kl: 0.01183662830792475
          policy_loss: -0.056045123764401315
          total_loss: 0.10639969117732512
          vf_explained_var: 0.9308170080184937
          vf_loss: 0.15587133742295778
    num_agent_steps_sampled: 6857256
    num_agent_steps_trained: 6857256
    num_steps_sampled: 6857256
    num_steps_trained: 685

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,686,107913,6857256,3.92404,14.72,-1.47,88.386




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6867252
  custom_metrics: {}
  date: 2021-11-08_20-15-03
  done: false
  episode_len_mean: 92.35185185185185
  episode_media: {}
  episode_reward_max: 18.049999999999944
  episode_reward_mean: 3.9693518518518602
  episode_reward_min: -1.5800000000000005
  episodes_this_iter: 108
  episodes_total: 74292
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0784068321570373
          entropy_coeff: 0.01
          kl: 0.012350330048570304
          policy_loss: -0.05814981078649433
          total_loss: 0.12251566875821505
          vf_explained_var: 0.9372898936271667
          vf_loss: 0.1733139520463271
    num_agent_steps_sampled: 6867252
    num_agent_steps_trained: 6867252
    num_steps_sampled: 6867252
    num_steps_trained: 68

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,687,108068,6867252,3.96935,18.05,-1.58,92.3519


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6877248
  custom_metrics: {}
  date: 2021-11-08_20-17-29
  done: false
  episode_len_mean: 91.90825688073394
  episode_media: {}
  episode_reward_max: 10.630000000000017
  episode_reward_mean: 3.8855045871559724
  episode_reward_min: -1.2000000000000004
  episodes_this_iter: 109
  episodes_total: 74401
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0711985797963592
          entropy_coeff: 0.01
          kl: 0.012304336101224084
          policy_loss: -0.059633877695912225
          total_loss: 0.09972082011751894
          vf_explained_var: 0.9303794503211975
          vf_loss: 0.15203586667139307
    num_agent_steps_sampled: 6877248
    num_agent_steps_trained: 6877248
    num_steps_sampled: 6877248
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,688,108213,6877248,3.8855,10.63,-1.2,91.9083




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6887244
  custom_metrics: {}
  date: 2021-11-08_20-20-00
  done: false
  episode_len_mean: 92.8785046728972
  episode_media: {}
  episode_reward_max: 20.29999999999995
  episode_reward_mean: 4.53682242990655
  episode_reward_min: -1.5100000000000007
  episodes_this_iter: 107
  episodes_total: 74508
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0476911690500046
          entropy_coeff: 0.01
          kl: 0.012568511394608904
          policy_loss: -0.05283870957871405
          total_loss: 0.14499319123629575
          vf_explained_var: 0.9354273080825806
          vf_loss: 0.189676171899415
    num_agent_steps_sampled: 6887244
    num_agent_steps_trained: 6887244
    num_steps_sampled: 6887244
    num_steps_trained: 6887244

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,689,108364,6887244,4.53682,20.3,-1.51,92.8785




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6897240
  custom_metrics: {}
  date: 2021-11-08_20-23-06
  done: false
  episode_len_mean: 91.10909090909091
  episode_media: {}
  episode_reward_max: 13.030000000000015
  episode_reward_mean: 4.252090909090919
  episode_reward_min: -1.2600000000000005
  episodes_this_iter: 110
  episodes_total: 74618
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0659172255768734
          entropy_coeff: 0.01
          kl: 0.011829176244551591
          policy_loss: -0.06236260883812594
          total_loss: 0.08996264150557227
          vf_explained_var: 0.9362631440162659
          vf_loss: 0.14603607952276357
    num_agent_steps_sampled: 6897240
    num_agent_steps_trained: 6897240
    num_steps_sampled: 6897240
    num_steps_trained: 68

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,690,108550,6897240,4.25209,13.03,-1.26,91.1091




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6907236
  custom_metrics: {}
  date: 2021-11-08_20-25-41
  done: false
  episode_len_mean: 94.07476635514018
  episode_media: {}
  episode_reward_max: 16.360000000000003
  episode_reward_mean: 4.729252336448609
  episode_reward_min: -1.1100000000000003
  episodes_this_iter: 107
  episodes_total: 74725
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.052806007148873
          entropy_coeff: 0.01
          kl: 0.013765518284860483
          policy_loss: -0.054198771845708545
          total_loss: 0.14232415941376717
          vf_explained_var: 0.9492300152778625
          vf_loss: 0.185691417947921
    num_agent_steps_sampled: 6907236
    num_agent_steps_trained: 6907236
    num_steps_sampled: 6907236
    num_steps_trained: 6907

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,691,108705,6907236,4.72925,16.36,-1.11,94.0748




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6917232
  custom_metrics: {}
  date: 2021-11-08_20-28-26
  done: false
  episode_len_mean: 94.82857142857142
  episode_media: {}
  episode_reward_max: 16.779999999999966
  episode_reward_mean: 4.39942857142858
  episode_reward_min: -1.1900000000000008
  episodes_this_iter: 105
  episodes_total: 74830
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.1018796428655966
          entropy_coeff: 0.01
          kl: 0.012016823034034499
          policy_loss: -0.05808990828087952
          total_loss: 0.10350568829677426
          vf_explained_var: 0.9475623369216919
          vf_loss: 0.15523856746462675
    num_agent_steps_sampled: 6917232
    num_agent_steps_trained: 6917232
    num_steps_sampled: 6917232
    num_steps_trained: 691

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,692,108870,6917232,4.39943,16.78,-1.19,94.8286




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6927228
  custom_metrics: {}
  date: 2021-11-08_20-31-03
  done: false
  episode_len_mean: 93.33644859813084
  episode_media: {}
  episode_reward_max: 14.980000000000013
  episode_reward_mean: 4.042616822429916
  episode_reward_min: -1.6100000000000008
  episodes_this_iter: 107
  episodes_total: 74937
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0778777345632897
          entropy_coeff: 0.01
          kl: 0.01259875306462083
          policy_loss: -0.05320439901139237
          total_loss: 0.11912307598604224
          vf_explained_var: 0.9328851103782654
          vf_loss: 0.16440471736793844
    num_agent_steps_sampled: 6927228
    num_agent_steps_trained: 6927228
    num_steps_sampled: 6927228
    num_steps_trained: 692

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,693,109027,6927228,4.04262,14.98,-1.61,93.3364




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6937224
  custom_metrics: {}
  date: 2021-11-08_20-33-40
  done: false
  episode_len_mean: 91.8348623853211
  episode_media: {}
  episode_reward_max: 15.849999999999998
  episode_reward_mean: 3.634495412844046
  episode_reward_min: -1.5400000000000007
  episodes_this_iter: 109
  episodes_total: 75046
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.047481531668932
          entropy_coeff: 0.01
          kl: 0.013903167513331028
          policy_loss: -0.05472711958946326
          total_loss: 0.1394389888128409
          vf_explained_var: 0.9281265735626221
          vf_loss: 0.1829677695647264
    num_agent_steps_sampled: 6937224
    num_agent_steps_trained: 6937224
    num_steps_sampled: 6937224
    num_steps_trained: 693722

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,694,109184,6937224,3.6345,15.85,-1.54,91.8349




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6947220
  custom_metrics: {}
  date: 2021-11-08_20-36-15
  done: false
  episode_len_mean: 92.43518518518519
  episode_media: {}
  episode_reward_max: 12.340000000000018
  episode_reward_mean: 4.345092592592603
  episode_reward_min: -1.1400000000000003
  episodes_this_iter: 108
  episodes_total: 75154
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.045051695852198
          entropy_coeff: 0.01
          kl: 0.012072848904706479
          policy_loss: -0.06071393818904956
          total_loss: 0.09987445937421842
          vf_explained_var: 0.9392983913421631
          vf_loss: 0.15353545501477953
    num_agent_steps_sampled: 6947220
    num_agent_steps_trained: 6947220
    num_steps_sampled: 6947220
    num_steps_trained: 694

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,695,109339,6947220,4.34509,12.34,-1.14,92.4352




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6957216
  custom_metrics: {}
  date: 2021-11-08_20-38-44
  done: false
  episode_len_mean: 95.12380952380953
  episode_media: {}
  episode_reward_max: 12.28000000000001
  episode_reward_mean: 3.6036190476190573
  episode_reward_min: -1.5000000000000007
  episodes_this_iter: 105
  episodes_total: 75259
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0772543313156846
          entropy_coeff: 0.01
          kl: 0.011220346337143411
          policy_loss: -0.05956322086067536
          total_loss: 0.06237604797650606
          vf_explained_var: 0.9465705752372742
          vf_loss: 0.11715045890364892
    num_agent_steps_sampled: 6957216
    num_agent_steps_trained: 6957216
    num_steps_sampled: 6957216
    num_steps_trained: 69

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,696,109488,6957216,3.60362,12.28,-1.5,95.1238




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6967212
  custom_metrics: {}
  date: 2021-11-08_20-41-32
  done: false
  episode_len_mean: 90.1981981981982
  episode_media: {}
  episode_reward_max: 13.020000000000012
  episode_reward_mean: 4.177027027027036
  episode_reward_min: -2.000000000000001
  episodes_this_iter: 111
  episodes_total: 75370
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0521521912680734
          entropy_coeff: 0.01
          kl: 0.012068688802144213
          policy_loss: -0.055395343695950304
          total_loss: 0.10368185134079212
          vf_explained_var: 0.9367672801017761
          vf_loss: 0.15210473464053664
    num_agent_steps_sampled: 6967212
    num_agent_steps_trained: 6967212
    num_steps_sampled: 6967212
    num_steps_trained: 696

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,697,109656,6967212,4.17703,13.02,-2,90.1982




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6977208
  custom_metrics: {}
  date: 2021-11-08_20-44-07
  done: false
  episode_len_mean: 91.74074074074075
  episode_media: {}
  episode_reward_max: 16.46999999999998
  episode_reward_mean: 4.245000000000009
  episode_reward_min: -1.3900000000000003
  episodes_this_iter: 108
  episodes_total: 75478
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0568164109164835
          entropy_coeff: 0.01
          kl: 0.012498183959199913
          policy_loss: -0.062109217569868785
          total_loss: 0.10904229821390513
          vf_explained_var: 0.9420475959777832
          vf_loss: 0.1632472539320588
    num_agent_steps_sampled: 6977208
    num_agent_steps_trained: 6977208
    num_steps_sampled: 6977208
    num_steps_trained: 697

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,698,109811,6977208,4.245,16.47,-1.39,91.7407




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6987204
  custom_metrics: {}
  date: 2021-11-08_20-46-51
  done: false
  episode_len_mean: 92.13636363636364
  episode_media: {}
  episode_reward_max: 12.980000000000011
  episode_reward_mean: 4.218909090909099
  episode_reward_min: -1.3600000000000005
  episodes_this_iter: 110
  episodes_total: 75588
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.061504519189525
          entropy_coeff: 0.01
          kl: 0.011798158558710548
          policy_loss: -0.05795882409836492
          total_loss: 0.09391871327932319
          vf_explained_var: 0.940834105014801
          vf_loss: 0.14561490265684376
    num_agent_steps_sampled: 6987204
    num_agent_steps_trained: 6987204
    num_steps_sampled: 6987204
    num_steps_trained: 6987

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,699,109975,6987204,4.21891,12.98,-1.36,92.1364




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 6997200
  custom_metrics: {}
  date: 2021-11-08_20-49-43
  done: false
  episode_len_mean: 90.72727272727273
  episode_media: {}
  episode_reward_max: 14.770000000000014
  episode_reward_mean: 4.362272727272736
  episode_reward_min: -1.4800000000000004
  episodes_this_iter: 110
  episodes_total: 75698
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0590232199073855
          entropy_coeff: 0.01
          kl: 0.012543860336128037
          policy_loss: -0.0570206905532087
          total_loss: 0.11911947055377511
          vf_explained_var: 0.9425324201583862
          vf_loss: 0.16815391037867874
    num_agent_steps_sampled: 6997200
    num_agent_steps_trained: 6997200
    num_steps_sampled: 6997200
    num_steps_trained: 699

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,700,110147,6997200,4.36227,14.77,-1.48,90.7273




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7007196
  custom_metrics: {}
  date: 2021-11-08_20-52-19
  done: false
  episode_len_mean: 92.0
  episode_media: {}
  episode_reward_max: 14.540000000000015
  episode_reward_mean: 3.6433944954128523
  episode_reward_min: -2.0100000000000002
  episodes_this_iter: 109
  episodes_total: 75807
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0541772298323804
          entropy_coeff: 0.01
          kl: 0.012951957014781718
          policy_loss: -0.05523140471524153
          total_loss: 0.12367356247626818
          vf_explained_var: 0.9257563948631287
          vf_loss: 0.16994056191581947
    num_agent_steps_sampled: 7007196
    num_agent_steps_trained: 7007196
    num_steps_sampled: 7007196
    num_steps_trained: 7007196
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,701,110303,7007196,3.64339,14.54,-2.01,92




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7017192
  custom_metrics: {}
  date: 2021-11-08_20-55-15
  done: false
  episode_len_mean: 91.30909090909091
  episode_media: {}
  episode_reward_max: 14.10000000000002
  episode_reward_mean: 3.576272727272736
  episode_reward_min: -1.5000000000000004
  episodes_this_iter: 110
  episodes_total: 75917
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.089961229226528
          entropy_coeff: 0.01
          kl: 0.011730186048373894
          policy_loss: -0.05721948609695348
          total_loss: 0.10380382655172521
          vf_explained_var: 0.9274312853813171
          vf_loss: 0.15520009435872492
    num_agent_steps_sampled: 7017192
    num_agent_steps_trained: 7017192
    num_steps_sampled: 7017192
    num_steps_trained: 7017

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,702,110479,7017192,3.57627,14.1,-1.5,91.3091


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7027188
  custom_metrics: {}
  date: 2021-11-08_20-57-39
  done: false
  episode_len_mean: 92.12037037037037
  episode_media: {}
  episode_reward_max: 16.99999999999998
  episode_reward_mean: 4.326296296296306
  episode_reward_min: -1.0400000000000005
  episodes_this_iter: 108
  episodes_total: 76025
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.084609376772856
          entropy_coeff: 0.01
          kl: 0.01248936105037136
          policy_loss: -0.059409896476974346
          total_loss: 0.10871763759507583
          vf_explained_var: 0.9435846209526062
          vf_loss: 0.16052130156347894
    num_agent_steps_sampled: 7027188
    num_agent_steps_trained: 7027188
    num_steps_sampled: 7027188
    num_steps_trained: 7027

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,703,110623,7027188,4.3263,17,-1.04,92.1204




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7037184
  custom_metrics: {}
  date: 2021-11-08_21-00-27
  done: false
  episode_len_mean: 89.52252252252252
  episode_media: {}
  episode_reward_max: 13.960000000000024
  episode_reward_mean: 3.737657657657666
  episode_reward_min: -1.7300000000000006
  episodes_this_iter: 111
  episodes_total: 76136
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0552593627546587
          entropy_coeff: 0.01
          kl: 0.011948432950318536
          policy_loss: -0.05835780792065665
          total_loss: 0.10034071228458968
          vf_explained_var: 0.9320175647735596
          vf_loss: 0.15203108906777751
    num_agent_steps_sampled: 7037184
    num_agent_steps_trained: 7037184
    num_steps_sampled: 7037184
    num_steps_trained: 70

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,704,110790,7037184,3.73766,13.96,-1.73,89.5225




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7047180
  custom_metrics: {}
  date: 2021-11-08_21-03-05
  done: false
  episode_len_mean: 91.71296296296296
  episode_media: {}
  episode_reward_max: 14.540000000000019
  episode_reward_mean: 5.200555555555566
  episode_reward_min: -1.1700000000000004
  episodes_this_iter: 108
  episodes_total: 76244
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.9982247341392387
          entropy_coeff: 0.01
          kl: 0.013181030103139023
          policy_loss: -0.054400265807461025
          total_loss: 0.14707978654047874
          vf_explained_var: 0.9415457248687744
          vf_loss: 0.19143426433269284
    num_agent_steps_sampled: 7047180
    num_agent_steps_trained: 7047180
    num_steps_sampled: 7047180
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,705,110949,7047180,5.20056,14.54,-1.17,91.713




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7057176
  custom_metrics: {}
  date: 2021-11-08_21-05-51
  done: false
  episode_len_mean: 92.28440366972477
  episode_media: {}
  episode_reward_max: 12.95000000000001
  episode_reward_mean: 4.373669724770652
  episode_reward_min: -1.1100000000000003
  episodes_this_iter: 109
  episodes_total: 76353
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0352098662629086
          entropy_coeff: 0.01
          kl: 0.011131083090285218
          policy_loss: -0.060880114924576546
          total_loss: 0.0741304389312545
          vf_explained_var: 0.9449453353881836
          vf_loss: 0.13000465496244212
    num_agent_steps_sampled: 7057176
    num_agent_steps_trained: 7057176
    num_steps_sampled: 7057176
    num_steps_trained: 705

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,706,111115,7057176,4.37367,12.95,-1.11,92.2844




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7067172
  custom_metrics: {}
  date: 2021-11-08_21-08-57
  done: false
  episode_len_mean: 89.66964285714286
  episode_media: {}
  episode_reward_max: 14.020000000000023
  episode_reward_mean: 4.150089285714295
  episode_reward_min: -1.1800000000000004
  episodes_this_iter: 112
  episodes_total: 76465
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.031402108200595
          entropy_coeff: 0.01
          kl: 0.01209309395921347
          policy_loss: -0.05820568011452754
          total_loss: 0.10155162631064399
          vf_explained_var: 0.940680205821991
          vf_loss: 0.1525217487484726
    num_agent_steps_sampled: 7067172
    num_agent_steps_trained: 7067172
    num_steps_sampled: 7067172
    num_steps_trained: 706717

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,707,111300,7067172,4.15009,14.02,-1.18,89.6696




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7077168
  custom_metrics: {}
  date: 2021-11-08_21-11-31
  done: false
  episode_len_mean: 92.14814814814815
  episode_media: {}
  episode_reward_max: 16.679999999999975
  episode_reward_mean: 4.071944444444453
  episode_reward_min: -1.5900000000000007
  episodes_this_iter: 108
  episodes_total: 76573
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.051965972704765
          entropy_coeff: 0.01
          kl: 0.012714177925857831
          policy_loss: -0.061088491013098475
          total_loss: 0.10831534440955545
          vf_explained_var: 0.9242082238197327
          vf_loss: 0.1609590068586871
    num_agent_steps_sampled: 7077168
    num_agent_steps_trained: 7077168
    num_steps_sampled: 7077168
    num_steps_trained: 707

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,708,111454,7077168,4.07194,16.68,-1.59,92.1481




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7087164
  custom_metrics: {}
  date: 2021-11-08_21-14-44
  done: false
  episode_len_mean: 88.53982300884955
  episode_media: {}
  episode_reward_max: 10.970000000000013
  episode_reward_mean: 3.4805309734513346
  episode_reward_min: -1.1700000000000006
  episodes_this_iter: 113
  episodes_total: 76686
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0271156571869158
          entropy_coeff: 0.01
          kl: 0.012036429020681472
          policy_loss: -0.05578638035008031
          total_loss: 0.11135397509663787
          vf_explained_var: 0.9256315231323242
          vf_loss: 0.15999102163741477
    num_agent_steps_sampled: 7087164
    num_agent_steps_trained: 7087164
    num_steps_sampled: 7087164
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,709,111647,7087164,3.48053,10.97,-1.17,88.5398


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7097160
  custom_metrics: {}
  date: 2021-11-08_21-17-09
  done: false
  episode_len_mean: 93.37037037037037
  episode_media: {}
  episode_reward_max: 16.37999999999997
  episode_reward_mean: 4.271296296296305
  episode_reward_min: -1.6900000000000008
  episodes_this_iter: 108
  episodes_total: 76794
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0338250266181097
          entropy_coeff: 0.01
          kl: 0.012506406439491928
          policy_loss: -0.059086978347956114
          total_loss: 0.1051148583396123
          vf_explained_var: 0.9337288737297058
          vf_loss: 0.15604892875840012
    num_agent_steps_sampled: 7097160
    num_agent_steps_trained: 7097160
    num_steps_sampled: 7097160
    num_steps_trained: 709

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,710,111792,7097160,4.2713,16.38,-1.69,93.3704




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7107156
  custom_metrics: {}
  date: 2021-11-08_21-20-06
  done: false
  episode_len_mean: 90.91818181818182
  episode_media: {}
  episode_reward_max: 12.710000000000015
  episode_reward_mean: 3.9231818181818277
  episode_reward_min: -1.4400000000000006
  episodes_this_iter: 110
  episodes_total: 76904
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.049628181437142
          entropy_coeff: 0.01
          kl: 0.011684187020973813
          policy_loss: -0.05790018578115692
          total_loss: 0.09588791760655804
          vf_explained_var: 0.9416105151176453
          vf_loss: 0.14766634627062286
    num_agent_steps_sampled: 7107156
    num_agent_steps_trained: 7107156
    num_steps_sampled: 7107156
    num_steps_trained: 71

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,711,111970,7107156,3.92318,12.71,-1.44,90.9182




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7117152
  custom_metrics: {}
  date: 2021-11-08_21-23-00
  done: false
  episode_len_mean: 92.70093457943925
  episode_media: {}
  episode_reward_max: 12.760000000000014
  episode_reward_mean: 4.520747663551412
  episode_reward_min: -1.6000000000000008
  episodes_this_iter: 107
  episodes_total: 77011
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0513881938070315
          entropy_coeff: 0.01
          kl: 0.01220646800501192
          policy_loss: -0.06213361094267959
          total_loss: 0.09108185708427276
          vf_explained_var: 0.9378252029418945
          vf_loss: 0.1459214877162097
    num_agent_steps_sampled: 7117152
    num_agent_steps_trained: 7117152
    num_steps_sampled: 7117152
    num_steps_trained: 7117

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,712,112143,7117152,4.52075,12.76,-1.6,92.7009




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7127148
  custom_metrics: {}
  date: 2021-11-08_21-25-45
  done: false
  episode_len_mean: 92.46296296296296
  episode_media: {}
  episode_reward_max: 16.36999999999996
  episode_reward_mean: 4.015277777777787
  episode_reward_min: -1.6300000000000008
  episodes_this_iter: 108
  episodes_total: 77119
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0369058053717652
          entropy_coeff: 0.01
          kl: 0.012577838804094268
          policy_loss: -0.05938706929739724
          total_loss: 0.1164117922496974
          vf_explained_var: 0.9402981996536255
          vf_loss: 0.16751402965945822
    num_agent_steps_sampled: 7127148
    num_agent_steps_trained: 7127148
    num_steps_sampled: 7127148
    num_steps_trained: 7127

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,713,112308,7127148,4.01528,16.37,-1.63,92.463




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7137144
  custom_metrics: {}
  date: 2021-11-08_21-28-26
  done: false
  episode_len_mean: 91.38181818181818
  episode_media: {}
  episode_reward_max: 13.150000000000013
  episode_reward_mean: 3.9911818181818273
  episode_reward_min: -1.1400000000000003
  episodes_this_iter: 110
  episodes_total: 77229
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0200001526082683
          entropy_coeff: 0.01
          kl: 0.011477746263428243
          policy_loss: -0.0600814120286805
          total_loss: 0.10984115946210093
          vf_explained_var: 0.9360304474830627
          vf_loss: 0.16397483186302786
    num_agent_steps_sampled: 7137144
    num_agent_steps_trained: 7137144
    num_steps_sampled: 7137144
    num_steps_trained: 71

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,714,112469,7137144,3.99118,13.15,-1.14,91.3818




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7147140
  custom_metrics: {}
  date: 2021-11-08_21-31-12
  done: false
  episode_len_mean: 92.5
  episode_media: {}
  episode_reward_max: 10.910000000000013
  episode_reward_mean: 4.695740740740752
  episode_reward_min: -1.640000000000001
  episodes_this_iter: 108
  episodes_total: 77337
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.00985500486488
          entropy_coeff: 0.01
          kl: 0.012684829394120285
          policy_loss: -0.056229871327582844
          total_loss: 0.11162382393207751
          vf_explained_var: 0.9428830146789551
          vf_loss: 0.15905461947186889
    num_agent_steps_sampled: 7147140
    num_agent_steps_trained: 7147140
    num_steps_sampled: 7147140
    num_steps_trained: 7147140
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,715,112635,7147140,4.69574,10.91,-1.64,92.5




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7157136
  custom_metrics: {}
  date: 2021-11-08_21-33-45
  done: false
  episode_len_mean: 92.4074074074074
  episode_media: {}
  episode_reward_max: 11.760000000000025
  episode_reward_mean: 3.7915740740740835
  episode_reward_min: -1.5100000000000007
  episodes_this_iter: 108
  episodes_total: 77445
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.050736242176121
          entropy_coeff: 0.01
          kl: 0.011870733638322901
          policy_loss: -0.059748457606213216
          total_loss: 0.08425294217836653
          vf_explained_var: 0.9249081015586853
          vf_loss: 0.13746574516129545
    num_agent_steps_sampled: 7157136
    num_agent_steps_trained: 7157136
    num_steps_sampled: 7157136
    num_steps_trained: 71

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,716,112788,7157136,3.79157,11.76,-1.51,92.4074




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7167132
  custom_metrics: {}
  date: 2021-11-08_21-36-32
  done: false
  episode_len_mean: 91.44036697247707
  episode_media: {}
  episode_reward_max: 14.170000000000018
  episode_reward_mean: 4.160642201834872
  episode_reward_min: -1.1100000000000005
  episodes_this_iter: 109
  episodes_total: 77554
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.03119687891414
          entropy_coeff: 0.01
          kl: 0.011833524843271654
          policy_loss: -0.05559417099381487
          total_loss: 0.12742955517660604
          vf_explained_var: 0.925957977771759
          vf_loss: 0.17637744553100604
    num_agent_steps_sampled: 7167132
    num_agent_steps_trained: 7167132
    num_steps_sampled: 7167132
    num_steps_trained: 71671

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,717,112955,7167132,4.16064,14.17,-1.11,91.4404




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7177128
  custom_metrics: {}
  date: 2021-11-08_21-39-10
  done: false
  episode_len_mean: 93.86792452830188
  episode_media: {}
  episode_reward_max: 14.690000000000014
  episode_reward_mean: 4.745471698113217
  episode_reward_min: -1.1000000000000005
  episodes_this_iter: 106
  episodes_total: 77660
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0138528601736083
          entropy_coeff: 0.01
          kl: 0.013171301611823833
          policy_loss: -0.05400658314092419
          total_loss: 0.1368710628710687
          vf_explained_var: 0.9423031210899353
          vf_loss: 0.1810103039440309
    num_agent_steps_sampled: 7177128
    num_agent_steps_trained: 7177128
    num_steps_sampled: 7177128
    num_steps_trained: 7177

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,718,113113,7177128,4.74547,14.69,-1.1,93.8679




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7187124
  custom_metrics: {}
  date: 2021-11-08_21-41-42
  done: false
  episode_len_mean: 94.32075471698113
  episode_media: {}
  episode_reward_max: 14.640000000000015
  episode_reward_mean: 4.419150943396236
  episode_reward_min: -1.0600000000000005
  episodes_this_iter: 106
  episodes_total: 77766
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.031149444417057
          entropy_coeff: 0.01
          kl: 0.011316223714665152
          policy_loss: -0.06122471935824197
          total_loss: 0.09020615061347047
          vf_explained_var: 0.9501964449882507
          vf_loss: 0.14596259128620737
    num_agent_steps_sampled: 7187124
    num_agent_steps_trained: 7187124
    num_steps_sampled: 7187124
    num_steps_trained: 718

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,719,113265,7187124,4.41915,14.64,-1.06,94.3208


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7197120
  custom_metrics: {}
  date: 2021-11-08_21-43-59
  done: false
  episode_len_mean: 96.50961538461539
  episode_media: {}
  episode_reward_max: 16.339999999999964
  episode_reward_mean: 4.493173076923086
  episode_reward_min: -1.2900000000000005
  episodes_this_iter: 104
  episodes_total: 77870
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0242253014165112
          entropy_coeff: 0.01
          kl: 0.012548357961112854
          policy_loss: -0.057597664173724306
          total_loss: 0.1240085062960911
          vf_explained_var: 0.9386026263237
          vf_loss: 0.1732616967497728
    num_agent_steps_sampled: 7197120
    num_agent_steps_trained: 7197120
    num_steps_sampled: 7197120
    num_steps_trained: 719712

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,720,113402,7197120,4.49317,16.34,-1.29,96.5096




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7207116
  custom_metrics: {}
  date: 2021-11-08_21-47-12
  done: false
  episode_len_mean: 89.40178571428571
  episode_media: {}
  episode_reward_max: 13.020000000000014
  episode_reward_mean: 3.9904464285714374
  episode_reward_min: -1.7300000000000009
  episodes_this_iter: 112
  episodes_total: 77982
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.9971671561909536
          entropy_coeff: 0.01
          kl: 0.011856194484147233
          policy_loss: -0.055342659628034654
          total_loss: 0.10479840896864477
          vf_explained_var: 0.9407345652580261
          vf_loss: 0.153102846048836
    num_agent_steps_sampled: 7207116
    num_agent_steps_trained: 7207116
    num_steps_sampled: 7207116
    num_steps_trained: 72

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,721,113595,7207116,3.99045,13.02,-1.73,89.4018




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7217112
  custom_metrics: {}
  date: 2021-11-08_21-50-01
  done: false
  episode_len_mean: 92.74074074074075
  episode_media: {}
  episode_reward_max: 12.610000000000015
  episode_reward_mean: 3.65148148148149
  episode_reward_min: -1.4600000000000006
  episodes_this_iter: 108
  episodes_total: 78090
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0327119309678037
          entropy_coeff: 0.01
          kl: 0.01213062879591528
          policy_loss: -0.05530587607341954
          total_loss: 0.1251917515690319
          vf_explained_var: 0.9315354824066162
          vf_loss: 0.1731896580539198
    num_agent_steps_sampled: 7217112
    num_agent_steps_trained: 7217112
    num_steps_sampled: 7217112
    num_steps_trained: 721711

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,722,113764,7217112,3.65148,12.61,-1.46,92.7407




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7227108
  custom_metrics: {}
  date: 2021-11-08_21-52-35
  done: false
  episode_len_mean: 94.4
  episode_media: {}
  episode_reward_max: 14.750000000000014
  episode_reward_mean: 4.001523809523819
  episode_reward_min: -1.3300000000000005
  episodes_this_iter: 105
  episodes_total: 78195
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0102528418231214
          entropy_coeff: 0.01
          kl: 0.011688445012013262
          policy_loss: -0.055526922463288164
          total_loss: 0.10454560821621209
          vf_explained_var: 0.9291244149208069
          vf_loss: 0.1535473197332432
    num_agent_steps_sampled: 7227108
    num_agent_steps_trained: 7227108
    num_steps_sampled: 7227108
    num_steps_trained: 7227108
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,723,113918,7227108,4.00152,14.75,-1.33,94.4




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7237104
  custom_metrics: {}
  date: 2021-11-08_21-55-33
  done: false
  episode_len_mean: 91.16363636363636
  episode_media: {}
  episode_reward_max: 14.340000000000018
  episode_reward_mean: 3.9061818181818264
  episode_reward_min: -1.3200000000000005
  episodes_this_iter: 110
  episodes_total: 78305
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0156822448102836
          entropy_coeff: 0.01
          kl: 0.011820195424776003
          policy_loss: -0.05825386097352219
          total_loss: 0.10705375150482879
          vf_explained_var: 0.9327852725982666
          vf_loss: 0.15853655209494197
    num_agent_steps_sampled: 7237104
    num_agent_steps_trained: 7237104
    num_steps_sampled: 7237104
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,724,114096,7237104,3.90618,14.34,-1.32,91.1636




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7247100
  custom_metrics: {}
  date: 2021-11-08_21-58-06
  done: false
  episode_len_mean: 94.33962264150944
  episode_media: {}
  episode_reward_max: 14.140000000000022
  episode_reward_mean: 4.092547169811331
  episode_reward_min: -1.670000000000001
  episodes_this_iter: 106
  episodes_total: 78411
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0372923445497824
          entropy_coeff: 0.01
          kl: 0.012052569070812112
          policy_loss: -0.05853349119106419
          total_loss: 0.10877467658227453
          vf_explained_var: 0.9346842169761658
          vf_loss: 0.1602238317187398
    num_agent_steps_sampled: 7247100
    num_agent_steps_trained: 7247100
    num_steps_sampled: 7247100
    num_steps_trained: 7247

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,725,114248,7247100,4.09255,14.14,-1.67,94.3396




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7257096
  custom_metrics: {}
  date: 2021-11-08_22-00-46
  done: false
  episode_len_mean: 93.70093457943925
  episode_media: {}
  episode_reward_max: 15.849999999999943
  episode_reward_mean: 4.156822429906552
  episode_reward_min: -1.570000000000001
  episodes_this_iter: 107
  episodes_total: 78518
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0173763938439198
          entropy_coeff: 0.01
          kl: 0.011407888501915028
          policy_loss: -0.059657845883351615
          total_loss: 0.08257899337933741
          vf_explained_var: 0.9452478289604187
          vf_loss: 0.13642200656139697
    num_agent_steps_sampled: 7257096
    num_agent_steps_trained: 7257096
    num_steps_sampled: 7257096
    num_steps_trained: 72

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,726,114408,7257096,4.15682,15.85,-1.57,93.7009




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7267092
  custom_metrics: {}
  date: 2021-11-08_22-03-36
  done: false
  episode_len_mean: 91.78899082568807
  episode_media: {}
  episode_reward_max: 17.4
  episode_reward_mean: 3.7322935779816606
  episode_reward_min: -1.4800000000000006
  episodes_this_iter: 109
  episodes_total: 78627
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0510700677195164
          entropy_coeff: 0.01
          kl: 0.012262078399526982
          policy_loss: -0.057231969660163945
          total_loss: 0.09862981089032613
          vf_explained_var: 0.9325704574584961
          vf_loss: 0.14843793337543806
    num_agent_steps_sampled: 7267092
    num_agent_steps_trained: 7267092
    num_steps_sampled: 7267092
    num_steps_trained: 7267092
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,727,114579,7267092,3.73229,17.4,-1.48,91.789




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7277088
  custom_metrics: {}
  date: 2021-11-08_22-06-13
  done: false
  episode_len_mean: 92.36111111111111
  episode_media: {}
  episode_reward_max: 11.13000000000001
  episode_reward_mean: 4.201018518518527
  episode_reward_min: -1.8100000000000007
  episodes_this_iter: 108
  episodes_total: 78735
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.034145303363474
          entropy_coeff: 0.01
          kl: 0.011102612960196349
          policy_loss: -0.057101103346635644
          total_loss: 0.08113701401166937
          vf_explained_var: 0.9345253705978394
          vf_loss: 0.13328643060711204
    num_agent_steps_sampled: 7277088
    num_agent_steps_trained: 7277088
    num_steps_sampled: 7277088
    num_steps_trained: 727

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,728,114736,7277088,4.20102,11.13,-1.81,92.3611


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7287084
  custom_metrics: {}
  date: 2021-11-08_22-08-35
  done: false
  episode_len_mean: 96.21153846153847
  episode_media: {}
  episode_reward_max: 13.010000000000012
  episode_reward_mean: 3.8376923076923166
  episode_reward_min: -1.1400000000000008
  episodes_this_iter: 104
  episodes_total: 78839
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0582993152814035
          entropy_coeff: 0.01
          kl: 0.011891968078500233
          policy_loss: -0.05948771515335792
          total_loss: 0.08780743692659287
          vf_explained_var: 0.9333718419075012
          vf_loss: 0.1407867544163496
    num_agent_steps_sampled: 7287084
    num_agent_steps_trained: 7287084
    num_steps_sampled: 7287084
    num_steps_trained: 72

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,729,114877,7287084,3.83769,13.01,-1.14,96.2115




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7297080
  custom_metrics: {}
  date: 2021-11-08_22-11-25
  done: false
  episode_len_mean: 93.22429906542057
  episode_media: {}
  episode_reward_max: 12.830000000000018
  episode_reward_mean: 4.528411214953281
  episode_reward_min: -1.5900000000000005
  episodes_this_iter: 107
  episodes_total: 78946
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.019760281407935
          entropy_coeff: 0.01
          kl: 0.013021798589645245
          policy_loss: -0.05175860460815776
          total_loss: 0.12591833127264537
          vf_explained_var: 0.9327818155288696
          vf_loss: 0.16820925270549508
    num_agent_steps_sampled: 7297080
    num_agent_steps_trained: 7297080
    num_steps_sampled: 7297080
    num_steps_trained: 729

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,730,115048,7297080,4.52841,12.83,-1.59,93.2243




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7307076
  custom_metrics: {}
  date: 2021-11-08_22-14-14
  done: false
  episode_len_mean: 94.16037735849056
  episode_media: {}
  episode_reward_max: 18.199999999999996
  episode_reward_mean: 4.73500000000001
  episode_reward_min: -1.5600000000000007
  episodes_this_iter: 106
  episodes_total: 79052
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0533513199569833
          entropy_coeff: 0.01
          kl: 0.011897653017366679
          policy_loss: -0.05763450926726955
          total_loss: 0.1202524099785548
          vf_explained_var: 0.9344891309738159
          vf_loss: 0.17131609205220244
    num_agent_steps_sampled: 7307076
    num_agent_steps_trained: 7307076
    num_steps_sampled: 7307076
    num_steps_trained: 7307

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,731,115216,7307076,4.735,18.2,-1.56,94.1604




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7317072
  custom_metrics: {}
  date: 2021-11-08_22-16-47
  done: false
  episode_len_mean: 94.99056603773585
  episode_media: {}
  episode_reward_max: 11.980000000000022
  episode_reward_mean: 4.181037735849067
  episode_reward_min: -1.2600000000000005
  episodes_this_iter: 106
  episodes_total: 79158
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.045997083900321
          entropy_coeff: 0.01
          kl: 0.011494108122513788
          policy_loss: -0.06000605127654779
          total_loss: 0.09666544482366651
          vf_explained_var: 0.9431989192962646
          vf_loss: 0.15094645026211556
    num_agent_steps_sampled: 7317072
    num_agent_steps_trained: 7317072
    num_steps_sampled: 7317072
    num_steps_trained: 731

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,732,115369,7317072,4.18104,11.98,-1.26,94.9906




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7327068
  custom_metrics: {}
  date: 2021-11-08_22-19-34
  done: false
  episode_len_mean: 94.625
  episode_media: {}
  episode_reward_max: 12.860000000000019
  episode_reward_mean: 4.574134615384627
  episode_reward_min: -1.1100000000000003
  episodes_this_iter: 104
  episodes_total: 79262
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0277335254555076
          entropy_coeff: 0.01
          kl: 0.012174874751890852
          policy_loss: -0.05804275814443827
          total_loss: 0.0987998075552412
          vf_explained_var: 0.951367199420929
          vf_loss: 0.14938401209079047
    num_agent_steps_sampled: 7327068
    num_agent_steps_trained: 7327068
    num_steps_sampled: 7327068
    num_steps_trained: 7327068
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,733,115537,7327068,4.57413,12.86,-1.11,94.625




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7337064
  custom_metrics: {}
  date: 2021-11-08_22-22-46
  done: false
  episode_len_mean: 93.76851851851852
  episode_media: {}
  episode_reward_max: 14.23000000000002
  episode_reward_mean: 3.731759259259268
  episode_reward_min: -1.930000000000001
  episodes_this_iter: 108
  episodes_total: 79370
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.038976996882349
          entropy_coeff: 0.01
          kl: 0.011828768757171107
          policy_loss: -0.05534471527824544
          total_loss: 0.11038403545673459
          vf_explained_var: 0.9308470487594604
          vf_loss: 0.15917110562356365
    num_agent_steps_sampled: 7337064
    num_agent_steps_trained: 7337064
    num_steps_sampled: 7337064
    num_steps_trained: 73370

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,734,115728,7337064,3.73176,14.23,-1.93,93.7685




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7347060
  custom_metrics: {}
  date: 2021-11-08_22-25-41
  done: false
  episode_len_mean: 93.55660377358491
  episode_media: {}
  episode_reward_max: 16.41999999999996
  episode_reward_mean: 4.477924528301896
  episode_reward_min: -1.3000000000000005
  episodes_this_iter: 106
  episodes_total: 79476
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0674331850475736
          entropy_coeff: 0.01
          kl: 0.013028044283806006
          policy_loss: -0.055405556843576266
          total_loss: 0.12759616798283452
          vf_explained_var: 0.9290504455566406
          vf_loss: 0.17399654226918887
    num_agent_steps_sampled: 7347060
    num_agent_steps_trained: 7347060
    num_steps_sampled: 7347060
    num_steps_trained: 73

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,735,115904,7347060,4.47792,16.42,-1.3,93.5566




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7357056
  custom_metrics: {}
  date: 2021-11-08_22-28-27
  done: false
  episode_len_mean: 91.88073394495413
  episode_media: {}
  episode_reward_max: 14.580000000000016
  episode_reward_mean: 3.651192660550467
  episode_reward_min: -1.6800000000000006
  episodes_this_iter: 109
  episodes_total: 79585
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0341403082904654
          entropy_coeff: 0.01
          kl: 0.010788334843446063
          policy_loss: -0.053628795203935896
          total_loss: 0.09037510212510824
          vf_explained_var: 0.9271177649497986
          vf_loss: 0.1397681246105677
    num_agent_steps_sampled: 7357056
    num_agent_steps_trained: 7357056
    num_steps_sampled: 7357056
    num_steps_trained: 73

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,736,116069,7357056,3.65119,14.58,-1.68,91.8807




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7367052
  custom_metrics: {}
  date: 2021-11-08_22-31-06
  done: false
  episode_len_mean: 93.59813084112149
  episode_media: {}
  episode_reward_max: 16.45000000000001
  episode_reward_mean: 4.292897196261692
  episode_reward_min: -1.4000000000000006
  episodes_this_iter: 107
  episodes_total: 79692
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0417228762920088
          entropy_coeff: 0.01
          kl: 0.012750595400430855
          policy_loss: -0.05653066739447924
          total_loss: 0.10545919193034498
          vf_explained_var: 0.9372024536132812
          vf_loss: 0.1533596376227772
    num_agent_steps_sampled: 7367052
    num_agent_steps_trained: 7367052
    num_steps_sampled: 7367052
    num_steps_trained: 7367

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,737,116228,7367052,4.2929,16.45,-1.4,93.5981




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7377048
  custom_metrics: {}
  date: 2021-11-08_22-33-59
  done: false
  episode_len_mean: 91.26605504587155
  episode_media: {}
  episode_reward_max: 14.660000000000014
  episode_reward_mean: 4.459633027522946
  episode_reward_min: -1.2000000000000004
  episodes_this_iter: 109
  episodes_total: 79801
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0088254397750918
          entropy_coeff: 0.01
          kl: 0.012482023730401177
          policy_loss: -0.054839923076777375
          total_loss: 0.1400323829239505
          vf_explained_var: 0.9355411529541016
          vf_loss: 0.1865249494329477
    num_agent_steps_sampled: 7377048
    num_agent_steps_trained: 7377048
    num_steps_sampled: 7377048
    num_steps_trained: 737

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,738,116401,7377048,4.45963,14.66,-1.2,91.2661


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7387044
  custom_metrics: {}
  date: 2021-11-08_22-36-28
  done: false
  episode_len_mean: 92.5137614678899
  episode_media: {}
  episode_reward_max: 12.660000000000018
  episode_reward_mean: 3.8777981651376234
  episode_reward_min: -1.580000000000001
  episodes_this_iter: 109
  episodes_total: 79910
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0362442470004414
          entropy_coeff: 0.01
          kl: 0.01150486395574197
          policy_loss: -0.056710921477876666
          total_loss: 0.10201754594842592
          vf_explained_var: 0.9392250776290894
          vf_loss: 0.1528813900043949
    num_agent_steps_sampled: 7387044
    num_agent_steps_trained: 7387044
    num_steps_sampled: 7387044
    num_steps_trained: 7387

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,739,116551,7387044,3.8778,12.66,-1.58,92.5138




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7397040
  custom_metrics: {}
  date: 2021-11-08_22-39-37
  done: false
  episode_len_mean: 91.94444444444444
  episode_media: {}
  episode_reward_max: 14.690000000000015
  episode_reward_mean: 4.4421296296296395
  episode_reward_min: -1.4600000000000006
  episodes_this_iter: 108
  episodes_total: 80018
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0463558953032535
          entropy_coeff: 0.01
          kl: 0.011305528871457433
          policy_loss: -0.0561179808826528
          total_loss: 0.10134090690706403
          vf_explained_var: 0.9467591643333435
          vf_loss: 0.15216703792540437
    num_agent_steps_sampled: 7397040
    num_agent_steps_trained: 7397040
    num_steps_sampled: 7397040
    num_steps_trained: 73

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,740,116739,7397040,4.44213,14.69,-1.46,91.9444




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7407036
  custom_metrics: {}
  date: 2021-11-08_22-42-30
  done: false
  episode_len_mean: 91.95454545454545
  episode_media: {}
  episode_reward_max: 18.24999999999994
  episode_reward_mean: 4.506818181818191
  episode_reward_min: -1.6000000000000008
  episodes_this_iter: 110
  episodes_total: 80128
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.047503886671148
          entropy_coeff: 0.01
          kl: 0.011843694274140502
          policy_loss: -0.05820201242931633
          total_loss: 0.12441963337234452
          vf_explained_var: 0.9320133328437805
          vf_loss: 0.17611526762111446
    num_agent_steps_sampled: 7407036
    num_agent_steps_trained: 7407036
    num_steps_sampled: 7407036
    num_steps_trained: 7407

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,741,116912,7407036,4.50682,18.25,-1.6,91.9545




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7417032
  custom_metrics: {}
  date: 2021-11-08_22-45-16
  done: false
  episode_len_mean: 89.27927927927928
  episode_media: {}
  episode_reward_max: 12.99000000000001
  episode_reward_mean: 3.8918918918919
  episode_reward_min: -1.4200000000000006
  episodes_this_iter: 111
  episodes_total: 80239
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.031227233369126
          entropy_coeff: 0.01
          kl: 0.011895708496474273
          policy_loss: -0.055736197090238075
          total_loss: 0.1284681188635146
          vf_explained_var: 0.9300735592842102
          vf_loss: 0.1774166767222759
    num_agent_steps_sampled: 7417032
    num_agent_steps_trained: 7417032
    num_steps_sampled: 7417032
    num_steps_trained: 7417032

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,742,117078,7417032,3.89189,12.99,-1.42,89.2793


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7427028
  custom_metrics: {}
  date: 2021-11-08_22-47-40
  done: false
  episode_len_mean: 94.16981132075472
  episode_media: {}
  episode_reward_max: 16.62999999999998
  episode_reward_mean: 4.982169811320766
  episode_reward_min: -2.1100000000000003
  episodes_this_iter: 106
  episodes_total: 80345
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0397506693489533
          entropy_coeff: 0.01
          kl: 0.012613751222182093
          policy_loss: -0.05883433396935973
          total_loss: 0.11681097865614117
          vf_explained_var: 0.9466959834098816
          vf_loss: 0.16730711615015553
    num_agent_steps_sampled: 7427028
    num_agent_steps_trained: 7427028
    num_steps_sampled: 7427028
    num_steps_trained: 742

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,743,117222,7427028,4.98217,16.63,-2.11,94.1698




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7437024
  custom_metrics: {}
  date: 2021-11-08_22-50-21
  done: false
  episode_len_mean: 93.36111111111111
  episode_media: {}
  episode_reward_max: 16.69000000000001
  episode_reward_mean: 4.663981481481493
  episode_reward_min: -1.6800000000000008
  episodes_this_iter: 108
  episodes_total: 80453
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0201648286265184
          entropy_coeff: 0.01
          kl: 0.012508680471655277
          policy_loss: -0.056477974395020905
          total_loss: 0.1225447240500496
          vf_explained_var: 0.9378181099891663
          vf_loss: 0.17072800820390893
    num_agent_steps_sampled: 7437024
    num_agent_steps_trained: 7437024
    num_steps_sampled: 7437024
    num_steps_trained: 743

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,744,117383,7437024,4.66398,16.69,-1.68,93.3611




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7447020
  custom_metrics: {}
  date: 2021-11-08_22-53-16
  done: false
  episode_len_mean: 89.09821428571429
  episode_media: {}
  episode_reward_max: 12.440000000000019
  episode_reward_mean: 4.483035714285725
  episode_reward_min: -1.4300000000000004
  episodes_this_iter: 112
  episodes_total: 80565
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0365759150594727
          entropy_coeff: 0.01
          kl: 0.011946285929324662
          policy_loss: -0.05552857765911991
          total_loss: 0.13537399010000448
          vf_explained_var: 0.9379496574401855
          vf_loss: 0.18405319263155645
    num_agent_steps_sampled: 7447020
    num_agent_steps_trained: 7447020
    num_steps_sampled: 7447020
    num_steps_trained: 74

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,745,117558,7447020,4.48304,12.44,-1.43,89.0982




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7457016
  custom_metrics: {}
  date: 2021-11-08_22-55-53
  done: false
  episode_len_mean: 93.07476635514018
  episode_media: {}
  episode_reward_max: 14.850000000000016
  episode_reward_mean: 5.16196261682244
  episode_reward_min: -1.4100000000000006
  episodes_this_iter: 107
  episodes_total: 80672
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.008964231482938
          entropy_coeff: 0.01
          kl: 0.01307501335594129
          policy_loss: -0.054708013995590374
          total_loss: 0.15617696300156925
          vf_explained_var: 0.9474495649337769
          vf_loss: 0.20118810532248427
    num_agent_steps_sampled: 7457016
    num_agent_steps_trained: 7457016
    num_steps_sampled: 7457016
    num_steps_trained: 7457

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,746,117715,7457016,5.16196,14.85,-1.41,93.0748




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7467012
  custom_metrics: {}
  date: 2021-11-08_22-58-39
  done: false
  episode_len_mean: 92.26851851851852
  episode_media: {}
  episode_reward_max: 13.02000000000001
  episode_reward_mean: 4.303703703703713
  episode_reward_min: -1.910000000000001
  episodes_this_iter: 108
  episodes_total: 80780
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0558470992960483
          entropy_coeff: 0.01
          kl: 0.011127347955278756
          policy_loss: -0.05982140930385416
          total_loss: 0.10165759332987488
          vf_explained_var: 0.9438791871070862
          vf_loss: 0.1566879836221536
    num_agent_steps_sampled: 7467012
    num_agent_steps_trained: 7467012
    num_steps_sampled: 7467012
    num_steps_trained: 74670

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,747,117881,7467012,4.3037,13.02,-1.91,92.2685




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7477008
  custom_metrics: {}
  date: 2021-11-08_23-01-31
  done: false
  episode_len_mean: 91.24770642201835
  episode_media: {}
  episode_reward_max: 17.139999999999997
  episode_reward_mean: 4.433669724770652
  episode_reward_min: -0.03
  episodes_this_iter: 109
  episodes_total: 80889
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.026423074037601
          entropy_coeff: 0.01
          kl: 0.013124004855926567
          policy_loss: -0.053203143313145025
          total_loss: 0.16760867146186084
          vf_explained_var: 0.928193986415863
          vf_loss: 0.21117792199564794
    num_agent_steps_sampled: 7477008
    num_agent_steps_trained: 7477008
    num_steps_sampled: 7477008
    num_steps_trained: 7477008
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,748,118053,7477008,4.43367,17.14,-0.03,91.2477




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7487004
  custom_metrics: {}
  date: 2021-11-08_23-04-06
  done: false
  episode_len_mean: 92.3425925925926
  episode_media: {}
  episode_reward_max: 18.249999999999947
  episode_reward_mean: 4.293148148148157
  episode_reward_min: -1.2100000000000009
  episodes_this_iter: 108
  episodes_total: 80997
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.04289796556163
          entropy_coeff: 0.01
          kl: 0.01217280110684102
          policy_loss: -0.056390302427686174
          total_loss: 0.12238854250003002
          vf_explained_var: 0.9357507228851318
          vf_loss: 0.17147666105411502
    num_agent_steps_sampled: 7487004
    num_agent_steps_trained: 7487004
    num_steps_sampled: 7487004
    num_steps_trained: 74870

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,749,118208,7487004,4.29315,18.25,-1.21,92.3426


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7497000
  custom_metrics: {}
  date: 2021-11-08_23-06-34
  done: false
  episode_len_mean: 92.53211009174312
  episode_media: {}
  episode_reward_max: 12.940000000000008
  episode_reward_mean: 4.645596330275239
  episode_reward_min: -1.4199999999999997
  episodes_this_iter: 109
  episodes_total: 81106
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0456599166250635
          entropy_coeff: 0.01
          kl: 0.012340468493478028
          policy_loss: -0.056597922070540936
          total_loss: 0.12374270977293197
          vf_explained_var: 0.9430824518203735
          vf_loss: 0.17268410038489562
    num_agent_steps_sampled: 7497000
    num_agent_steps_trained: 7497000
    num_steps_sampled: 7497000
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,750,118356,7497000,4.6456,12.94,-1.42,92.5321




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7506996
  custom_metrics: {}
  date: 2021-11-08_23-09-38
  done: false
  episode_len_mean: 90.69090909090909
  episode_media: {}
  episode_reward_max: 12.250000000000016
  episode_reward_mean: 3.8102727272727357
  episode_reward_min: -1.6800000000000006
  episodes_this_iter: 110
  episodes_total: 81216
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0604707327663387
          entropy_coeff: 0.01
          kl: 0.011300725914708163
          policy_loss: -0.05926519298615555
          total_loss: 0.08230763078054301
          vf_explained_var: 0.9466552734375
          vf_loss: 0.13643306438675804
    num_agent_steps_sampled: 7506996
    num_agent_steps_trained: 7506996
    num_steps_sampled: 7506996
    num_steps_trained: 7506

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,751,118540,7506996,3.81027,12.25,-1.68,90.6909




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7516992
  custom_metrics: {}
  date: 2021-11-08_23-12-49
  done: false
  episode_len_mean: 88.14912280701755
  episode_media: {}
  episode_reward_max: 14.400000000000016
  episode_reward_mean: 3.8841228070175524
  episode_reward_min: -1.2900000000000005
  episodes_this_iter: 114
  episodes_total: 81330
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.020374859907688
          entropy_coeff: 0.01
          kl: 0.011301942456527162
          policy_loss: -0.052696043763182356
          total_loss: 0.13813106537693076
          vf_explained_var: 0.9267853498458862
          vf_loss: 0.185283619329397
    num_agent_steps_sampled: 7516992
    num_agent_steps_trained: 7516992
    num_steps_sampled: 7516992
    num_steps_trained: 751

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,752,118730,7516992,3.88412,14.4,-1.29,88.1491




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7526988
  custom_metrics: {}
  date: 2021-11-08_23-15-39
  done: false
  episode_len_mean: 91.11009174311927
  episode_media: {}
  episode_reward_max: 16.730000000000015
  episode_reward_mean: 4.385412844036708
  episode_reward_min: -1.5200000000000007
  episodes_this_iter: 109
  episodes_total: 81439
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0313136697834375
          entropy_coeff: 0.01
          kl: 0.011728952882483283
          policy_loss: -0.05573898928526502
          total_loss: 0.11179539007731737
          vf_explained_var: 0.930083155632019
          vf_loss: 0.16112749386722078
    num_agent_steps_sampled: 7526988
    num_agent_steps_trained: 7526988
    num_steps_sampled: 7526988
    num_steps_trained: 752

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,753,118900,7526988,4.38541,16.73,-1.52,91.1101


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7536984
  custom_metrics: {}
  date: 2021-11-08_23-18-05
  done: false
  episode_len_mean: 90.009009009009
  episode_media: {}
  episode_reward_max: 10.500000000000016
  episode_reward_mean: 3.579369369369378
  episode_reward_min: -1.5800000000000005
  episodes_this_iter: 111
  episodes_total: 81550
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0709400904484285
          entropy_coeff: 0.01
          kl: 0.011151175346368924
          policy_loss: -0.06263814110340726
          total_loss: 0.06575608470946805
          vf_explained_var: 0.9507629871368408
          vf_loss: 0.1236998541933349
    num_agent_steps_sampled: 7536984
    num_agent_steps_trained: 7536984
    num_steps_sampled: 7536984
    num_steps_trained: 75369

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,754,119047,7536984,3.57937,10.5,-1.58,90.009




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7546980
  custom_metrics: {}
  date: 2021-11-08_23-20-41
  done: false
  episode_len_mean: 92.08256880733946
  episode_media: {}
  episode_reward_max: 12.600000000000016
  episode_reward_mean: 4.058256880733954
  episode_reward_min: -1.6000000000000005
  episodes_this_iter: 109
  episodes_total: 81659
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.035083876308213
          entropy_coeff: 0.01
          kl: 0.013065786753952629
          policy_loss: -0.05859276873624732
          total_loss: 0.12180529516588291
          vf_explained_var: 0.9271795153617859
          vf_loss: 0.17098340563253206
    num_agent_steps_sampled: 7546980
    num_agent_steps_trained: 7546980
    num_steps_sampled: 7546980
    num_steps_trained: 754

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,755,119203,7546980,4.05826,12.6,-1.6,92.0826




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7556976
  custom_metrics: {}
  date: 2021-11-08_23-23-20
  done: false
  episode_len_mean: 89.96396396396396
  episode_media: {}
  episode_reward_max: 12.910000000000014
  episode_reward_mean: 4.127657657657666
  episode_reward_min: -2.079999999999999
  episodes_this_iter: 111
  episodes_total: 81770
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.045500679810842
          entropy_coeff: 0.01
          kl: 0.012090247658048704
          policy_loss: -0.06135746375617818
          total_loss: 0.0949428926468787
          vf_explained_var: 0.9349818229675293
          vf_loss: 0.14921226647929248
    num_agent_steps_sampled: 7556976
    num_agent_steps_trained: 7556976
    num_steps_sampled: 7556976
    num_steps_trained: 75569

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,756,119362,7556976,4.12766,12.91,-2.08,89.964




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7566972
  custom_metrics: {}
  date: 2021-11-08_23-26-17
  done: false
  episode_len_mean: 90.67272727272727
  episode_media: {}
  episode_reward_max: 12.700000000000014
  episode_reward_mean: 4.334545454545464
  episode_reward_min: -1.430000000000001
  episodes_this_iter: 110
  episodes_total: 81880
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.040098328773792
          entropy_coeff: 0.01
          kl: 0.011461925170541017
          policy_loss: -0.05523485157511428
          total_loss: 0.11386992387935264
          vf_explained_var: 0.9390733242034912
          vf_loss: 0.16339406022467676
    num_agent_steps_sampled: 7566972
    num_agent_steps_trained: 7566972
    num_steps_sampled: 7566972
    num_steps_trained: 7566

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,757,119538,7566972,4.33455,12.7,-1.43,90.6727




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7576968
  custom_metrics: {}
  date: 2021-11-08_23-29-18
  done: false
  episode_len_mean: 89.49107142857143
  episode_media: {}
  episode_reward_max: 18.56999999999997
  episode_reward_mean: 4.528839285714294
  episode_reward_min: -1.4000000000000006
  episodes_this_iter: 112
  episodes_total: 81992
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.040688460097354
          entropy_coeff: 0.01
          kl: 0.011642223061264492
          policy_loss: -0.05739103196363928
          total_loss: 0.08443641140659014
          vf_explained_var: 0.955069899559021
          vf_loss: 0.13571188827721864
    num_agent_steps_sampled: 7576968
    num_agent_steps_trained: 7576968
    num_steps_sampled: 7576968
    num_steps_trained: 75769

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,758,119720,7576968,4.52884,18.57,-1.4,89.4911


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7586964
  custom_metrics: {}
  date: 2021-11-08_23-31-44
  done: false
  episode_len_mean: 91.33636363636364
  episode_media: {}
  episode_reward_max: 16.909999999999993
  episode_reward_mean: 4.16681818181819
  episode_reward_min: -1.3300000000000003
  episodes_this_iter: 110
  episodes_total: 82102
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.073180796040429
          entropy_coeff: 0.01
          kl: 0.011427856293039447
          policy_loss: -0.05723865276091119
          total_loss: 0.12286864164739084
          vf_explained_var: 0.9270882606506348
          vf_loss: 0.17480501692710268
    num_agent_steps_sampled: 7586964
    num_agent_steps_trained: 7586964
    num_steps_sampled: 7586964
    num_steps_trained: 7586

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,759,119865,7586964,4.16682,16.91,-1.33,91.3364




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7596960
  custom_metrics: {}
  date: 2021-11-08_23-34-28
  done: false
  episode_len_mean: 87.96491228070175
  episode_media: {}
  episode_reward_max: 12.630000000000013
  episode_reward_mean: 4.523508771929834
  episode_reward_min: -0.9300000000000006
  episodes_this_iter: 114
  episodes_total: 82216
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0198308352731233
          entropy_coeff: 0.01
          kl: 0.011099223064883399
          policy_loss: -0.05756158030185944
          total_loss: 0.08903134947276523
          vf_explained_var: 0.9496151208877563
          vf_loss: 0.1415058195988974
    num_agent_steps_sampled: 7596960
    num_agent_steps_trained: 7596960
    num_steps_sampled: 7596960
    num_steps_trained: 759

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,760,120030,7596960,4.52351,12.63,-0.93,87.9649




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7606956
  custom_metrics: {}
  date: 2021-11-08_23-37-18
  done: false
  episode_len_mean: 87.41592920353982
  episode_media: {}
  episode_reward_max: 16.309999999999956
  episode_reward_mean: 4.420707964601779
  episode_reward_min: -1.1300000000000003
  episodes_this_iter: 113
  episodes_total: 82329
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0604005075927474
          entropy_coeff: 0.01
          kl: 0.011725548605101317
          policy_loss: -0.059774076586796177
          total_loss: 0.09557299681692424
          vf_explained_var: 0.949367105960846
          vf_loss: 0.1492388119905168
    num_agent_steps_sampled: 7606956
    num_agent_steps_trained: 7606956
    num_steps_sampled: 7606956
    num_steps_trained: 760

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,761,120199,7606956,4.42071,16.31,-1.13,87.4159


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7616952
  custom_metrics: {}
  date: 2021-11-08_23-39-43
  done: false
  episode_len_mean: 91.57272727272728
  episode_media: {}
  episode_reward_max: 16.550000000000008
  episode_reward_mean: 4.574818181818193
  episode_reward_min: -1.7100000000000006
  episodes_this_iter: 110
  episodes_total: 82439
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.026859654626276
          entropy_coeff: 0.01
          kl: 0.013004867296080047
          policy_loss: -0.05604085099572937
          total_loss: 0.11994348580383847
          vf_explained_var: 0.9487645030021667
          vf_loss: 0.16662621974913228
    num_agent_steps_sampled: 7616952
    num_agent_steps_trained: 7616952
    num_steps_sampled: 7616952
    num_steps_trained: 761

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,762,120344,7616952,4.57482,16.55,-1.71,91.5727




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7626948
  custom_metrics: {}
  date: 2021-11-08_23-42-27
  done: false
  episode_len_mean: 89.16964285714286
  episode_media: {}
  episode_reward_max: 10.900000000000011
  episode_reward_mean: 4.509910714285724
  episode_reward_min: -1.770000000000001
  episodes_this_iter: 112
  episodes_total: 82551
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0425289979347814
          entropy_coeff: 0.01
          kl: 0.011915577140298832
          policy_loss: -0.05886439807617511
          total_loss: 0.11252759392333464
          vf_explained_var: 0.9483529925346375
          vf_loss: 0.16467210818878097
    num_agent_steps_sampled: 7626948
    num_agent_steps_trained: 7626948
    num_steps_sampled: 7626948
    num_steps_trained: 762

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,763,120508,7626948,4.50991,10.9,-1.77,89.1696




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7636944
  custom_metrics: {}
  date: 2021-11-08_23-45-23
  done: false
  episode_len_mean: 87.75438596491227
  episode_media: {}
  episode_reward_max: 14.950000000000014
  episode_reward_mean: 4.361228070175447
  episode_reward_min: -1.4400000000000006
  episodes_this_iter: 114
  episodes_total: 82665
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.034271281193464
          entropy_coeff: 0.01
          kl: 0.011699858620248358
          policy_loss: -0.05641521142843442
          total_loss: 0.133411982585477
          vf_explained_var: 0.9324982762336731
          vf_loss: 0.18351616378889507
    num_agent_steps_sampled: 7636944
    num_agent_steps_trained: 7636944
    num_steps_sampled: 7636944
    num_steps_trained: 76369

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,764,120684,7636944,4.36123,14.95,-1.44,87.7544




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7646940
  custom_metrics: {}
  date: 2021-11-08_23-48-03
  done: false
  episode_len_mean: 89.84821428571429
  episode_media: {}
  episode_reward_max: 14.430000000000014
  episode_reward_mean: 4.190089285714295
  episode_reward_min: -1.2700000000000007
  episodes_this_iter: 112
  episodes_total: 82777
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.051578382345346
          entropy_coeff: 0.01
          kl: 0.011827423219124212
          policy_loss: -0.05893815211378611
          total_loss: 0.09426331035315226
          vf_explained_var: 0.9421722888946533
          vf_loss: 0.1467728962755611
    num_agent_steps_sampled: 7646940
    num_agent_steps_trained: 7646940
    num_steps_sampled: 7646940
    num_steps_trained: 7646

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,765,120844,7646940,4.19009,14.43,-1.27,89.8482


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7656936
  custom_metrics: {}
  date: 2021-11-08_23-50-32
  done: false
  episode_len_mean: 88.74774774774775
  episode_media: {}
  episode_reward_max: 14.99000000000001
  episode_reward_mean: 4.039639639639649
  episode_reward_min: -1.3000000000000005
  episodes_this_iter: 111
  episodes_total: 82888
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.050622863851042
          entropy_coeff: 0.01
          kl: 0.011938934806219303
          policy_loss: -0.060762633443770246
          total_loss: 0.10877918109584313
          vf_explained_var: 0.929267168045044
          vf_loss: 0.16284965827870063
    num_agent_steps_sampled: 7656936
    num_agent_steps_trained: 7656936
    num_steps_sampled: 7656936
    num_steps_trained: 7656

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,766,120993,7656936,4.03964,14.99,-1.3,88.7477




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7666932
  custom_metrics: {}
  date: 2021-11-08_23-53-27
  done: false
  episode_len_mean: 88.59292035398231
  episode_media: {}
  episode_reward_max: 16.44999999999997
  episode_reward_mean: 4.552566371681426
  episode_reward_min: -1.7500000000000009
  episodes_this_iter: 113
  episodes_total: 83001
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.026527641981076
          entropy_coeff: 0.01
          kl: 0.012145418769737997
          policy_loss: -0.05790300136193251
          total_loss: 0.09950117021480687
          vf_explained_var: 0.9493969678878784
          vf_loss: 0.15000066530500722
    num_agent_steps_sampled: 7666932
    num_agent_steps_trained: 7666932
    num_steps_sampled: 7666932
    num_steps_trained: 7666

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,767,121168,7666932,4.55257,16.45,-1.75,88.5929




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7676928
  custom_metrics: {}
  date: 2021-11-08_23-56-21
  done: false
  episode_len_mean: 87.7719298245614
  episode_media: {}
  episode_reward_max: 16.71999999999998
  episode_reward_mean: 4.887719298245624
  episode_reward_min: -1.2200000000000004
  episodes_this_iter: 114
  episodes_total: 83115
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.001530882639763
          entropy_coeff: 0.01
          kl: 0.01332343569398289
          policy_loss: -0.05930844610158959
          total_loss: 0.13480095262519823
          vf_explained_var: 0.944628894329071
          vf_loss: 0.18377225431812624
    num_agent_steps_sampled: 7676928
    num_agent_steps_trained: 7676928
    num_steps_sampled: 7676928
    num_steps_trained: 7676928

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,768,121342,7676928,4.88772,16.72,-1.22,87.7719




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7686924
  custom_metrics: {}
  date: 2021-11-08_23-59-03
  done: false
  episode_len_mean: 87.53508771929825
  episode_media: {}
  episode_reward_max: 12.810000000000013
  episode_reward_mean: 4.541666666666676
  episode_reward_min: -1.7200000000000006
  episodes_this_iter: 114
  episodes_total: 83229
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0017540851209916
          entropy_coeff: 0.01
          kl: 0.011436992294581103
          policy_loss: -0.05503516797071848
          total_loss: 0.1200203696377257
          vf_explained_var: 0.9400305151939392
          vf_loss: 0.169018180302989
    num_agent_steps_sampled: 7686924
    num_agent_steps_trained: 7686924
    num_steps_sampled: 7686924
    num_steps_trained: 76869

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,769,121504,7686924,4.54167,12.81,-1.72,87.5351




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7696920
  custom_metrics: {}
  date: 2021-11-09_00-02-00
  done: false
  episode_len_mean: 87.55652173913043
  episode_media: {}
  episode_reward_max: 18.67999999999994
  episode_reward_mean: 4.296869565217398
  episode_reward_min: -1.4300000000000006
  episodes_this_iter: 115
  episodes_total: 83344
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.024310477065225
          entropy_coeff: 0.01
          kl: 0.012096609818812604
          policy_loss: -0.054438974564242314
          total_loss: 0.14037244936140875
          vf_explained_var: 0.9392223358154297
          vf_loss: 0.18749693896716985
    num_agent_steps_sampled: 7696920
    num_agent_steps_trained: 7696920
    num_steps_sampled: 7696920
    num_steps_trained: 769

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,770,121681,7696920,4.29687,18.68,-1.43,87.5565




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7706916
  custom_metrics: {}
  date: 2021-11-09_00-04-56
  done: false
  episode_len_mean: 87.11504424778761
  episode_media: {}
  episode_reward_max: 14.620000000000013
  episode_reward_mean: 4.256902654867266
  episode_reward_min: -1.620000000000001
  episodes_this_iter: 113
  episodes_total: 83457
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.02380239637489
          entropy_coeff: 0.01
          kl: 0.011000707536775338
          policy_loss: -0.05880421751584762
          total_loss: 0.07653692279361252
          vf_explained_var: 0.9448068141937256
          vf_loss: 0.1305181773331685
    num_agent_steps_sampled: 7706916
    num_agent_steps_trained: 7706916
    num_steps_sampled: 7706916
    num_steps_trained: 770691

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,771,121856,7706916,4.2569,14.62,-1.62,87.115




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7716912
  custom_metrics: {}
  date: 2021-11-09_00-07-42
  done: false
  episode_len_mean: 87.65217391304348
  episode_media: {}
  episode_reward_max: 16.87000000000001
  episode_reward_mean: 4.367217391304356
  episode_reward_min: -1.3800000000000006
  episodes_this_iter: 115
  episodes_total: 83572
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.9867036377262866
          entropy_coeff: 0.01
          kl: 0.012389058822080148
          policy_loss: -0.053222003919828655
          total_loss: 0.1379171248907462
          vf_explained_var: 0.9333925843238831
          vf_loss: 0.18278234087599393
    num_agent_steps_sampled: 7716912
    num_agent_steps_trained: 7716912
    num_steps_sampled: 7716912
    num_steps_trained: 771

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,772,122023,7716912,4.36722,16.87,-1.38,87.6522




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7726908
  custom_metrics: {}
  date: 2021-11-09_00-10-32
  done: false
  episode_len_mean: 90.30630630630631
  episode_media: {}
  episode_reward_max: 10.690000000000012
  episode_reward_mean: 3.9976576576576655
  episode_reward_min: -1.3600000000000003
  episodes_this_iter: 111
  episodes_total: 83683
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.004740552107493
          entropy_coeff: 0.01
          kl: 0.012345700359021642
          policy_loss: -0.05316506771323008
          total_loss: 0.11339615566226152
          vf_explained_var: 0.9357135891914368
          vf_loss: 0.15848357965692114
    num_agent_steps_sampled: 7726908
    num_agent_steps_trained: 7726908
    num_steps_sampled: 7726908
    num_steps_trained: 77

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,773,122193,7726908,3.99766,10.69,-1.36,90.3063




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7736904
  custom_metrics: {}
  date: 2021-11-09_00-13-13
  done: false
  episode_len_mean: 90.95454545454545
  episode_media: {}
  episode_reward_max: 13.29000000000001
  episode_reward_mean: 3.9631818181818264
  episode_reward_min: -1.8400000000000012
  episodes_this_iter: 110
  episodes_total: 83793
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.017347782697433
          entropy_coeff: 0.01
          kl: 0.01232384988690923
          policy_loss: -0.05621152217619312
          total_loss: 0.1041709075984346
          vf_explained_var: 0.9323046207427979
          vf_loss: 0.15248063582942908
    num_agent_steps_sampled: 7736904
    num_agent_steps_trained: 7736904
    num_steps_sampled: 7736904
    num_steps_trained: 77369

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,774,122354,7736904,3.96318,13.29,-1.84,90.9545


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7746900
  custom_metrics: {}
  date: 2021-11-09_00-15-39
  done: false
  episode_len_mean: 91.30275229357798
  episode_media: {}
  episode_reward_max: 16.669999999999984
  episode_reward_mean: 4.178990825688083
  episode_reward_min: -1.280000000000001
  episodes_this_iter: 109
  episodes_total: 83902
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0102213330757923
          entropy_coeff: 0.01
          kl: 0.012146769337532962
          policy_loss: -0.056975124849595576
          total_loss: 0.12655989329656983
          vf_explained_var: 0.9276513457298279
          vf_loss: 0.17596537122487002
    num_agent_steps_sampled: 7746900
    num_agent_steps_trained: 7746900
    num_steps_sampled: 7746900
    num_steps_trained: 77

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,775,122500,7746900,4.17899,16.67,-1.28,91.3028




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7756896
  custom_metrics: {}
  date: 2021-11-09_00-18-20
  done: false
  episode_len_mean: 89.49107142857143
  episode_media: {}
  episode_reward_max: 12.350000000000012
  episode_reward_mean: 4.203482142857152
  episode_reward_min: -1.4200000000000006
  episodes_this_iter: 112
  episodes_total: 84014
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.02952579820258
          entropy_coeff: 0.01
          kl: 0.01173176110497023
          policy_loss: -0.05780504535342384
          total_loss: 0.13656244134514506
          vf_explained_var: 0.9298693537712097
          vf_loss: 0.18793632661621285
    num_agent_steps_sampled: 7756896
    num_agent_steps_trained: 7756896
    num_steps_sampled: 7756896
    num_steps_trained: 77568

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,776,122661,7756896,4.20348,12.35,-1.42,89.4911




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7766892
  custom_metrics: {}
  date: 2021-11-09_00-21-16
  done: false
  episode_len_mean: 88.60176991150442
  episode_media: {}
  episode_reward_max: 13.320000000000007
  episode_reward_mean: 4.579911504424788
  episode_reward_min: -0.8800000000000002
  episodes_this_iter: 113
  episodes_total: 84127
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.9653964303497575
          entropy_coeff: 0.01
          kl: 0.012202686299998433
          policy_loss: -0.0547027172250116
          total_loss: 0.12185719574395663
          vf_explained_var: 0.94972163438797
          vf_loss: 0.16841463164354747
    num_agent_steps_sampled: 7766892
    num_agent_steps_trained: 7766892
    num_steps_sampled: 7766892
    num_steps_trained: 77668

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,777,122837,7766892,4.57991,13.32,-0.88,88.6018




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7776888
  custom_metrics: {}
  date: 2021-11-09_00-23-58
  done: false
  episode_len_mean: 91.53211009174312
  episode_media: {}
  episode_reward_max: 13.30000000000001
  episode_reward_mean: 4.323302752293587
  episode_reward_min: -1.990000000000001
  episodes_this_iter: 109
  episodes_total: 84236
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.012614755243318
          entropy_coeff: 0.01
          kl: 0.012385679729474738
          policy_loss: -0.056519947554438545
          total_loss: 0.10975335488796362
          vf_explained_var: 0.938232421875
          vf_loss: 0.15818332358080353
    num_agent_steps_sampled: 7776888
    num_agent_steps_trained: 7776888
    num_steps_sampled: 7776888
    num_steps_trained: 7776888


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,778,122998,7776888,4.3233,13.3,-1.99,91.5321




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7786884
  custom_metrics: {}
  date: 2021-11-09_00-26-52
  done: false
  episode_len_mean: 90.85321100917432
  episode_media: {}
  episode_reward_max: 14.650000000000015
  episode_reward_mean: 4.4566055045871655
  episode_reward_min: -1.4300000000000006
  episodes_this_iter: 109
  episodes_total: 84345
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.027178911571829
          entropy_coeff: 0.01
          kl: 0.01253858328619344
          policy_loss: -0.05601272072770402
          total_loss: 0.1377234162794601
          vf_explained_var: 0.9360888004302979
          vf_loss: 0.18544346533683884
    num_agent_steps_sampled: 7786884
    num_agent_steps_trained: 7786884
    num_steps_sampled: 7786884
    num_steps_trained: 7786

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,779,123173,7786884,4.45661,14.65,-1.43,90.8532


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7796880
  custom_metrics: {}
  date: 2021-11-09_00-29-21
  done: false
  episode_len_mean: 91.51351351351352
  episode_media: {}
  episode_reward_max: 12.550000000000017
  episode_reward_mean: 4.097657657657666
  episode_reward_min: -1.7200000000000009
  episodes_this_iter: 111
  episodes_total: 84456
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0186223359189484
          entropy_coeff: 0.01
          kl: 0.012227365496066726
          policy_loss: -0.058506058068930084
          total_loss: 0.10025700200747094
          vf_explained_var: 0.9363325238227844
          vf_loss: 0.15109381627800883
    num_agent_steps_sampled: 7796880
    num_agent_steps_trained: 7796880
    num_steps_sampled: 7796880
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,780,123321,7796880,4.09766,12.55,-1.72,91.5135




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7806876
  custom_metrics: {}
  date: 2021-11-09_00-32-19
  done: false
  episode_len_mean: 90.35454545454546
  episode_media: {}
  episode_reward_max: 12.730000000000016
  episode_reward_mean: 4.313636363636373
  episode_reward_min: -1.800000000000001
  episodes_this_iter: 110
  episodes_total: 84566
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0067702802837406
          entropy_coeff: 0.01
          kl: 0.012390227947908677
          policy_loss: -0.05659697588501323
          total_loss: 0.11585970822976441
          vf_explained_var: 0.9332121014595032
          vf_loss: 0.1642978977603026
    num_agent_steps_sampled: 7806876
    num_agent_steps_trained: 7806876
    num_steps_sampled: 7806876
    num_steps_trained: 7806

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,781,123500,7806876,4.31364,12.73,-1.8,90.3545




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7816872
  custom_metrics: {}
  date: 2021-11-09_00-35-26
  done: false
  episode_len_mean: 90.10810810810811
  episode_media: {}
  episode_reward_max: 18.49999999999997
  episode_reward_mean: 4.3618018018018105
  episode_reward_min: -1.2400000000000007
  episodes_this_iter: 111
  episodes_total: 84677
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.9970376381507287
          entropy_coeff: 0.01
          kl: 0.012565094299493842
          policy_loss: -0.05363065102097825
          total_loss: 0.13345128210961946
          vf_explained_var: 0.9306148290634155
          vf_loss: 0.1784274528821946
    num_agent_steps_sampled: 7816872
    num_agent_steps_trained: 7816872
    num_steps_sampled: 7816872
    num_steps_trained: 781

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,782,123686,7816872,4.3618,18.5,-1.24,90.1081




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7826868
  custom_metrics: {}
  date: 2021-11-09_00-38-00
  done: false
  episode_len_mean: 90.88181818181818
  episode_media: {}
  episode_reward_max: 14.99000000000001
  episode_reward_mean: 4.901727272727283
  episode_reward_min: -1.560000000000001
  episodes_this_iter: 110
  episodes_total: 84787
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.9950338738596338
          entropy_coeff: 0.01
          kl: 0.012820110205815267
          policy_loss: -0.05197588262012881
          total_loss: 0.14703358564780564
          vf_explained_var: 0.9519028067588806
          vf_loss: 0.1897539929845012
    num_agent_steps_sampled: 7826868
    num_agent_steps_trained: 7826868
    num_steps_sampled: 7826868
    num_steps_trained: 78268

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,783,123840,7826868,4.90173,14.99,-1.56,90.8818




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7836864
  custom_metrics: {}
  date: 2021-11-09_00-40-50
  done: false
  episode_len_mean: 88.61061946902655
  episode_media: {}
  episode_reward_max: 16.359999999999946
  episode_reward_mean: 4.863185840707973
  episode_reward_min: -1.4400000000000006
  episodes_this_iter: 113
  episodes_total: 84900
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.986476402506869
          entropy_coeff: 0.01
          kl: 0.012310420974892057
          policy_loss: -0.05512069932017953
          total_loss: 0.11444053056403103
          vf_explained_var: 0.9503492116928101
          vf_loss: 0.16138131422205612
    num_agent_steps_sampled: 7836864
    num_agent_steps_trained: 7836864
    num_steps_sampled: 7836864
    num_steps_trained: 783

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,784,124010,7836864,4.86319,16.36,-1.44,88.6106




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7846860
  custom_metrics: {}
  date: 2021-11-09_00-43-27
  done: false
  episode_len_mean: 91.2090909090909
  episode_media: {}
  episode_reward_max: 18.36999999999997
  episode_reward_mean: 4.720727272727282
  episode_reward_min: -1.2800000000000007
  episodes_this_iter: 110
  episodes_total: 85010
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0132480124123076
          entropy_coeff: 0.01
          kl: 0.012736200428297215
          policy_loss: -0.05291271619339529
          total_loss: 0.12889571147055454
          vf_explained_var: 0.9463139176368713
          vf_loss: 0.17292625014789595
    num_agent_steps_sampled: 7846860
    num_agent_steps_trained: 7846860
    num_steps_sampled: 7846860
    num_steps_trained: 7846

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,785,124168,7846860,4.72073,18.37,-1.28,91.2091




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7856856
  custom_metrics: {}
  date: 2021-11-09_00-46-24
  done: false
  episode_len_mean: 88.87387387387388
  episode_media: {}
  episode_reward_max: 16.42999999999995
  episode_reward_mean: 5.065225225225234
  episode_reward_min: -1.5000000000000007
  episodes_this_iter: 111
  episodes_total: 85121
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.9900626473956637
          entropy_coeff: 0.01
          kl: 0.012620450885116624
          policy_loss: -0.05325275937365925
          total_loss: 0.12053314020904975
          vf_explained_var: 0.9512946009635925
          vf_loss: 0.16493555986855785
    num_agent_steps_sampled: 7856856
    num_agent_steps_trained: 7856856
    num_steps_sampled: 7856856
    num_steps_trained: 785

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,786,124344,7856856,5.06523,16.43,-1.5,88.8739




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7866852
  custom_metrics: {}
  date: 2021-11-09_00-49-04
  done: false
  episode_len_mean: 92.47222222222223
  episode_media: {}
  episode_reward_max: 14.690000000000017
  episode_reward_mean: 4.720833333333342
  episode_reward_min: -0.8600000000000005
  episodes_this_iter: 108
  episodes_total: 85229
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.9788467737344595
          entropy_coeff: 0.01
          kl: 0.012965881688553655
          policy_loss: -0.05508384148064905
          total_loss: 0.16103576912234227
          vf_explained_var: 0.9410158395767212
          vf_loss: 0.20637017876610286
    num_agent_steps_sampled: 7866852
    num_agent_steps_trained: 7866852
    num_steps_sampled: 7866852
    num_steps_trained: 78

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,787,124504,7866852,4.72083,14.69,-0.86,92.4722




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7876848
  custom_metrics: {}
  date: 2021-11-09_00-51-59
  done: false
  episode_len_mean: 88.91228070175438
  episode_media: {}
  episode_reward_max: 12.930000000000014
  episode_reward_mean: 4.407631578947378
  episode_reward_min: -1.3200000000000007
  episodes_this_iter: 114
  episodes_total: 85343
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.001951375985757
          entropy_coeff: 0.01
          kl: 0.01172971176940043
          policy_loss: -0.0534315346294425
          total_loss: 0.09537338054715058
          vf_explained_var: 0.9507599472999573
          vf_loss: 0.1421026787155459
    num_agent_steps_sampled: 7876848
    num_agent_steps_trained: 7876848
    num_steps_sampled: 7876848
    num_steps_trained: 787684

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,788,124679,7876848,4.40763,12.93,-1.32,88.9123


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7886844
  custom_metrics: {}
  date: 2021-11-09_00-54-27
  done: false
  episode_len_mean: 92.98130841121495
  episode_media: {}
  episode_reward_max: 14.99000000000001
  episode_reward_mean: 4.859252336448609
  episode_reward_min: -1.6100000000000005
  episodes_this_iter: 107
  episodes_total: 85450
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.9939552450791382
          entropy_coeff: 0.01
          kl: 0.013611601124160204
          policy_loss: -0.05475129032205058
          total_loss: 0.11674576989319335
          vf_explained_var: 0.9451492428779602
          vf_loss: 0.16042768193138207
    num_agent_steps_sampled: 7886844
    num_agent_steps_trained: 7886844
    num_steps_sampled: 7886844
    num_steps_trained: 788

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,789,124827,7886844,4.85925,14.99,-1.61,92.9813




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7896840
  custom_metrics: {}
  date: 2021-11-09_00-57-24
  done: false
  episode_len_mean: 91.96330275229357
  episode_media: {}
  episode_reward_max: 16.78999999999999
  episode_reward_mean: 4.6933944954128535
  episode_reward_min: -1.3200000000000003
  episodes_this_iter: 109
  episodes_total: 85559
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.020534381805322
          entropy_coeff: 0.01
          kl: 0.012807742415573899
          policy_loss: -0.057803545229168786
          total_loss: 0.11557838577403026
          vf_explained_var: 0.9477292895317078
          vf_loss: 0.1644096353958942
    num_agent_steps_sampled: 7896840
    num_agent_steps_trained: 7896840
    num_steps_sampled: 7896840
    num_steps_trained: 789

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,790,125004,7896840,4.69339,16.79,-1.32,91.9633




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7906836
  custom_metrics: {}
  date: 2021-11-09_01-00-02
  done: false
  episode_len_mean: 90.8
  episode_media: {}
  episode_reward_max: 16.42
  episode_reward_mean: 4.253454545454555
  episode_reward_min: -1.2600000000000002
  episodes_this_iter: 110
  episodes_total: 85669
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.9812031995536934
          entropy_coeff: 0.01
          kl: 0.01240801552649696
          policy_loss: -0.05730794610407872
          total_loss: 0.10174684092784539
          vf_explained_var: 0.9382365942001343
          vf_loss: 0.15059980815674504
    num_agent_steps_sampled: 7906836
    num_agent_steps_trained: 7906836
    num_steps_sampled: 7906836
    num_steps_trained: 7906836
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,791,125162,7906836,4.25345,16.42,-1.26,90.8




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7916832
  custom_metrics: {}
  date: 2021-11-09_01-03-10
  done: false
  episode_len_mean: 89.65765765765765
  episode_media: {}
  episode_reward_max: 14.120000000000019
  episode_reward_mean: 3.9206306306306393
  episode_reward_min: -1.3600000000000003
  episodes_this_iter: 111
  episodes_total: 85780
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.9995646299459995
          entropy_coeff: 0.01
          kl: 0.011630307787598434
          policy_loss: -0.057101776727881186
          total_loss: 0.11022548716133222
          vf_explained_var: 0.9333921670913696
          vf_loss: 0.16082761405179133
    num_agent_steps_sampled: 7916832
    num_agent_steps_trained: 7916832
    num_steps_sampled: 7916832
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,792,125350,7916832,3.92063,14.12,-1.36,89.6577




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7926828
  custom_metrics: {}
  date: 2021-11-09_01-06-07
  done: false
  episode_len_mean: 88.90178571428571
  episode_media: {}
  episode_reward_max: 11.030000000000008
  episode_reward_mean: 4.658392857142866
  episode_reward_min: -1.3900000000000003
  episodes_this_iter: 112
  episodes_total: 85892
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.9999267075815772
          entropy_coeff: 0.01
          kl: 0.012346717479361275
          policy_loss: -0.0593889148078031
          total_loss: 0.10856600021545448
          vf_explained_var: 0.9419257044792175
          vf_loss: 0.15982681467619717
    num_agent_steps_sampled: 7926828
    num_agent_steps_trained: 7926828
    num_steps_sampled: 7926828
    num_steps_trained: 792

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,793,125527,7926828,4.65839,11.03,-1.39,88.9018




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7936824
  custom_metrics: {}
  date: 2021-11-09_01-08-48
  done: false
  episode_len_mean: 93.81308411214954
  episode_media: {}
  episode_reward_max: 18.519999999999957
  episode_reward_mean: 4.9104672897196355
  episode_reward_min: -0.7700000000000006
  episodes_this_iter: 107
  episodes_total: 85999
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0214968568239455
          entropy_coeff: 0.01
          kl: 0.012423315521805329
          policy_loss: -0.056320161467943435
          total_loss: 0.12205852142845591
          vf_explained_var: 0.9484840035438538
          vf_loss: 0.17029178524947064
    num_agent_steps_sampled: 7936824
    num_agent_steps_trained: 7936824
    num_steps_sampled: 7936824
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,794,125688,7936824,4.91047,18.52,-0.77,93.8131




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7946820
  custom_metrics: {}
  date: 2021-11-09_01-11-34
  done: false
  episode_len_mean: 91.41284403669725
  episode_media: {}
  episode_reward_max: 12.610000000000014
  episode_reward_mean: 4.060550458715605
  episode_reward_min: -2.219999999999999
  episodes_this_iter: 109
  episodes_total: 86108
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.055725995190123
          entropy_coeff: 0.01
          kl: 0.012146521740322635
          policy_loss: -0.05854442850328409
          total_loss: 0.0977110077889684
          vf_explained_var: 0.9352782964706421
          vf_loss: 0.14914140108431506
    num_agent_steps_sampled: 7946820
    num_agent_steps_trained: 7946820
    num_steps_sampled: 7946820
    num_steps_trained: 79468

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,795,125854,7946820,4.06055,12.61,-2.22,91.4128




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7956816
  custom_metrics: {}
  date: 2021-11-09_01-14-28
  done: false
  episode_len_mean: 91.77981651376147
  episode_media: {}
  episode_reward_max: 12.360000000000015
  episode_reward_mean: 4.121009174311937
  episode_reward_min: -0.46000000000000085
  episodes_this_iter: 109
  episodes_total: 86217
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0540742854786735
          entropy_coeff: 0.01
          kl: 0.011299953569556006
          policy_loss: -0.05997214243452773
          total_loss: 0.0904823470621919
          vf_explained_var: 0.9463887810707092
          vf_loss: 0.14525252419691057
    num_agent_steps_sampled: 7956816
    num_agent_steps_trained: 7956816
    num_steps_sampled: 7956816
    num_steps_trained: 79

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,796,126028,7956816,4.12101,12.36,-0.46,91.7798




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7966812
  custom_metrics: {}
  date: 2021-11-09_01-17-09
  done: false
  episode_len_mean: 90.91818181818182
  episode_media: {}
  episode_reward_max: 11.98000000000002
  episode_reward_mean: 4.001727272727282
  episode_reward_min: -1.9300000000000008
  episodes_this_iter: 110
  episodes_total: 86327
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.020423078129434
          entropy_coeff: 0.01
          kl: 0.01142316389677771
          policy_loss: -0.05978890828406199
          total_loss: 0.10257357495367272
          vf_explained_var: 0.9310057759284973
          vf_loss: 0.15654331618585648
    num_agent_steps_sampled: 7966812
    num_agent_steps_trained: 7966812
    num_steps_sampled: 7966812
    num_steps_trained: 79668

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,797,126189,7966812,4.00173,11.98,-1.93,90.9182


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7976808
  custom_metrics: {}
  date: 2021-11-09_01-19-31
  done: false
  episode_len_mean: 92.31481481481481
  episode_media: {}
  episode_reward_max: 10.490000000000014
  episode_reward_mean: 4.035277777777788
  episode_reward_min: -1.7100000000000006
  episodes_this_iter: 108
  episodes_total: 86435
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0502053203745785
          entropy_coeff: 0.01
          kl: 0.01175738231778927
          policy_loss: -0.05776225894004998
          total_loss: 0.10238894659420873
          vf_explained_var: 0.9335981607437134
          vf_loss: 0.15386847061956796
    num_agent_steps_sampled: 7976808
    num_agent_steps_trained: 7976808
    num_steps_sampled: 7976808
    num_steps_trained: 797

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,798,126331,7976808,4.03528,10.49,-1.71,92.3148




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7986804
  custom_metrics: {}
  date: 2021-11-09_01-22-09
  done: false
  episode_len_mean: 92.44444444444444
  episode_media: {}
  episode_reward_max: 12.780000000000017
  episode_reward_mean: 4.570925925925936
  episode_reward_min: -1.6800000000000006
  episodes_this_iter: 108
  episodes_total: 86543
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.02771276795966
          entropy_coeff: 0.01
          kl: 0.01259142573746063
          policy_loss: -0.05655530881391377
          total_loss: 0.1174073038670497
          vf_explained_var: 0.9496863484382629
          vf_loss: 0.16555489842167012
    num_agent_steps_sampled: 7986804
    num_agent_steps_trained: 7986804
    num_steps_sampled: 7986804
    num_steps_trained: 798680

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,799,126488,7986804,4.57093,12.78,-1.68,92.4444




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 7996800
  custom_metrics: {}
  date: 2021-11-09_01-25-02
  done: false
  episode_len_mean: 89.41592920353982
  episode_media: {}
  episode_reward_max: 14.050000000000015
  episode_reward_mean: 4.506814159292046
  episode_reward_min: -1.790000000000001
  episodes_this_iter: 113
  episodes_total: 86656
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0414497325563024
          entropy_coeff: 0.01
          kl: 0.012076241807845597
          policy_loss: -0.05604463603793301
          total_loss: 0.11866835681243967
          vf_explained_var: 0.941871166229248
          vf_loss: 0.16761630139130557
    num_agent_steps_sampled: 7996800
    num_agent_steps_trained: 7996800
    num_steps_sampled: 7996800
    num_steps_trained: 7996

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,800,126661,7996800,4.50681,14.05,-1.79,89.4159


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8006796
  custom_metrics: {}
  date: 2021-11-09_01-27-35
  done: false
  episode_len_mean: 91.46296296296296
  episode_media: {}
  episode_reward_max: 14.290000000000019
  episode_reward_mean: 4.342870370370379
  episode_reward_min: -1.4100000000000008
  episodes_this_iter: 108
  episodes_total: 86764
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0260860315754883
          entropy_coeff: 0.01
          kl: 0.012008649170930157
          policy_loss: -0.058367310443686114
          total_loss: 0.08674948759790924
          vf_explained_var: 0.9476712942123413
          vf_loss: 0.13802045189226286
    num_agent_steps_sampled: 8006796
    num_agent_steps_trained: 8006796
    num_steps_sampled: 8006796
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,801,126815,8006796,4.34287,14.29,-1.41,91.463




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8016792
  custom_metrics: {}
  date: 2021-11-09_01-30-19
  done: false
  episode_len_mean: 89.28947368421052
  episode_media: {}
  episode_reward_max: 14.840000000000018
  episode_reward_mean: 4.745877192982467
  episode_reward_min: -1.4400000000000008
  episodes_this_iter: 114
  episodes_total: 86878
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.970775547496274
          entropy_coeff: 0.01
          kl: 0.01271554375710174
          policy_loss: -0.05313920229832586
          total_loss: 0.14739533840463712
          vf_explained_var: 0.9505671858787537
          vf_loss: 0.19127469704223748
    num_agent_steps_sampled: 8016792
    num_agent_steps_trained: 8016792
    num_steps_sampled: 8016792
    num_steps_trained: 8016

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,802,126978,8016792,4.74588,14.84,-1.44,89.2895




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8026788
  custom_metrics: {}
  date: 2021-11-09_01-32-58
  done: false
  episode_len_mean: 91.3425925925926
  episode_media: {}
  episode_reward_max: 14.650000000000016
  episode_reward_mean: 4.4969444444444555
  episode_reward_min: -1.6800000000000008
  episodes_this_iter: 108
  episodes_total: 86986
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.008252569549104
          entropy_coeff: 0.01
          kl: 0.01169635777195503
          policy_loss: -0.05635243283314073
          total_loss: 0.10756384110412537
          vf_explained_var: 0.9505149722099304
          vf_loss: 0.15735303443084414
    num_agent_steps_sampled: 8026788
    num_agent_steps_trained: 8026788
    num_steps_sampled: 8026788
    num_steps_trained: 8026

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,803,127137,8026788,4.49694,14.65,-1.68,91.3426


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8036784
  custom_metrics: {}
  date: 2021-11-09_01-35-24
  done: false
  episode_len_mean: 89.91964285714286
  episode_media: {}
  episode_reward_max: 16.31999999999994
  episode_reward_mean: 4.2353571428571515
  episode_reward_min: -1.7400000000000007
  episodes_this_iter: 112
  episodes_total: 87098
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.973890263402564
          entropy_coeff: 0.01
          kl: 0.01246785067680229
          policy_loss: -0.05587807329785493
          total_loss: 0.13453116007149218
          vf_explained_var: 0.9399173855781555
          vf_loss: 0.18174481343669005
    num_agent_steps_sampled: 8036784
    num_agent_steps_trained: 8036784
    num_steps_sampled: 8036784
    num_steps_trained: 8036

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,804,127283,8036784,4.23536,16.32,-1.74,89.9196




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8046780
  custom_metrics: {}
  date: 2021-11-09_01-38-04
  done: false
  episode_len_mean: 89.76576576576576
  episode_media: {}
  episode_reward_max: 12.620000000000017
  episode_reward_mean: 4.253963963963974
  episode_reward_min: -1.4500000000000006
  episodes_this_iter: 111
  episodes_total: 87209
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0273294683195586
          entropy_coeff: 0.01
          kl: 0.011204341165950675
          policy_loss: -0.059660505545604176
          total_loss: 0.09554625122099478
          vf_explained_var: 0.9455888867378235
          vf_loss: 0.14995516019308158
    num_agent_steps_sampled: 8046780
    num_agent_steps_trained: 8046780
    num_steps_sampled: 8046780
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,805,127444,8046780,4.25396,12.62,-1.45,89.7658




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8056776
  custom_metrics: {}
  date: 2021-11-09_01-40-45
  done: false
  episode_len_mean: 91.00909090909092
  episode_media: {}
  episode_reward_max: 18.17999999999993
  episode_reward_mean: 4.542181818181828
  episode_reward_min: -1.910000000000001
  episodes_this_iter: 110
  episodes_total: 87319
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.024103043935238
          entropy_coeff: 0.01
          kl: 0.012342249045161919
          policy_loss: -0.05637266150015032
          total_loss: 0.12191749083148873
          vf_explained_var: 0.9373980760574341
          vf_loss: 0.17041399601496693
    num_agent_steps_sampled: 8056776
    num_agent_steps_trained: 8056776
    num_steps_sampled: 8056776
    num_steps_trained: 80567

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,806,127604,8056776,4.54218,18.18,-1.91,91.0091


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8066772
  custom_metrics: {}
  date: 2021-11-09_01-43-13
  done: false
  episode_len_mean: 90.52727272727273
  episode_media: {}
  episode_reward_max: 14.940000000000017
  episode_reward_mean: 4.35409090909092
  episode_reward_min: -1.6500000000000006
  episodes_this_iter: 110
  episodes_total: 87429
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.9888786440221673
          entropy_coeff: 0.01
          kl: 0.011701755832419199
          policy_loss: -0.05764001670779071
          total_loss: 0.11398042766934531
          vf_explained_var: 0.9474801421165466
          vf_loss: 0.16485116795087473
    num_agent_steps_sampled: 8066772
    num_agent_steps_trained: 8066772
    num_steps_sampled: 8066772
    num_steps_trained: 806

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,807,127752,8066772,4.35409,14.94,-1.65,90.5273




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8076768
  custom_metrics: {}
  date: 2021-11-09_01-45-59
  done: false
  episode_len_mean: 89.36607142857143
  episode_media: {}
  episode_reward_max: 14.740000000000016
  episode_reward_mean: 4.629375000000009
  episode_reward_min: -1.1000000000000005
  episodes_this_iter: 112
  episodes_total: 87541
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0336488443562106
          entropy_coeff: 0.01
          kl: 0.012687737448624935
          policy_loss: -0.056335755962973987
          total_loss: 0.14350827496785384
          vf_explained_var: 0.9464468359947205
          vf_loss: 0.1912762668270331
    num_agent_steps_sampled: 8076768
    num_agent_steps_trained: 8076768
    num_steps_sampled: 8076768
    num_steps_trained: 80

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,808,127918,8076768,4.62938,14.74,-1.1,89.3661




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8086764
  custom_metrics: {}
  date: 2021-11-09_01-48-49
  done: false
  episode_len_mean: 90.9
  episode_media: {}
  episode_reward_max: 12.570000000000016
  episode_reward_mean: 4.547636363636373
  episode_reward_min: -1.7300000000000009
  episodes_this_iter: 110
  episodes_total: 87651
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0162664024238914
          entropy_coeff: 0.01
          kl: 0.012260150591946389
          policy_loss: -0.05441039559455254
          total_loss: 0.13767156993898635
          vf_explained_var: 0.9256844520568848
          vf_loss: 0.18431447219326455
    num_agent_steps_sampled: 8086764
    num_agent_steps_trained: 8086764
    num_steps_sampled: 8086764
    num_steps_trained: 8086764
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,809,128088,8086764,4.54764,12.57,-1.73,90.9




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8096760
  custom_metrics: {}
  date: 2021-11-09_01-51-55
  done: false
  episode_len_mean: 87.929203539823
  episode_media: {}
  episode_reward_max: 16.569999999999947
  episode_reward_mean: 4.755752212389389
  episode_reward_min: -1.540000000000001
  episodes_this_iter: 113
  episodes_total: 87764
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.974078028731876
          entropy_coeff: 0.01
          kl: 0.01263400463403214
          policy_loss: -0.05382683118884889
          total_loss: 0.14369974146063766
          vf_explained_var: 0.9367064237594604
          vf_loss: 0.18848551051993656
    num_agent_steps_sampled: 8096760
    num_agent_steps_trained: 8096760
    num_steps_sampled: 8096760
    num_steps_trained: 8096760

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,810,128275,8096760,4.75575,16.57,-1.54,87.9292


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8106756
  custom_metrics: {}
  date: 2021-11-09_01-54-24
  done: false
  episode_len_mean: 89.69642857142857
  episode_media: {}
  episode_reward_max: 10.890000000000013
  episode_reward_mean: 4.145803571428582
  episode_reward_min: -1.1500000000000006
  episodes_this_iter: 112
  episodes_total: 87876
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.9995857533226666
          entropy_coeff: 0.01
          kl: 0.011198987674995683
          policy_loss: -0.05911794443511301
          total_loss: 0.0923683694555846
          vf_explained_var: 0.9528408050537109
          vf_loss: 0.14596947607480817
    num_agent_steps_sampled: 8106756
    num_agent_steps_trained: 8106756
    num_steps_sampled: 8106756
    num_steps_trained: 810

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,811,128424,8106756,4.1458,10.89,-1.15,89.6964




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8116752
  custom_metrics: {}
  date: 2021-11-09_01-57-05
  done: false
  episode_len_mean: 89.22321428571429
  episode_media: {}
  episode_reward_max: 20.45999999999995
  episode_reward_mean: 4.706607142857152
  episode_reward_min: -1.5000000000000007
  episodes_this_iter: 112
  episodes_total: 87988
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.998280931232322
          entropy_coeff: 0.01
          kl: 0.013417131639880257
          policy_loss: -0.05403777039092448
          total_loss: 0.1571687808371762
          vf_explained_var: 0.9237889051437378
          vf_loss: 0.20062345611966317
    num_agent_steps_sampled: 8116752
    num_agent_steps_trained: 8116752
    num_steps_sampled: 8116752
    num_steps_trained: 81167

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,812,128585,8116752,4.70661,20.46,-1.5,89.2232




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8126748
  custom_metrics: {}
  date: 2021-11-09_02-00-07
  done: false
  episode_len_mean: 87.57894736842105
  episode_media: {}
  episode_reward_max: 18.369999999999926
  episode_reward_mean: 4.825526315789482
  episode_reward_min: -1.6700000000000013
  episodes_this_iter: 114
  episodes_total: 88102
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.9866559831505148
          entropy_coeff: 0.01
          kl: 0.011654131318660802
          policy_loss: -0.05582420982929886
          total_loss: 0.12436513473542454
          vf_explained_var: 0.9504060745239258
          vf_loss: 0.17350633564508625
    num_agent_steps_sampled: 8126748
    num_agent_steps_trained: 8126748
    num_steps_sampled: 8126748
    num_steps_trained: 81

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,813,128766,8126748,4.82553,18.37,-1.67,87.5789




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8136744
  custom_metrics: {}
  date: 2021-11-09_02-02-52
  done: false
  episode_len_mean: 86.66956521739131
  episode_media: {}
  episode_reward_max: 14.620000000000012
  episode_reward_mean: 3.9881739130434877
  episode_reward_min: -1.4300000000000006
  episodes_this_iter: 115
  episodes_total: 88217
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0110391904146243
          entropy_coeff: 0.01
          kl: 0.010992602044271854
          policy_loss: -0.05535362901595923
          total_loss: 0.08267939726814118
          vf_explained_var: 0.9471849799156189
          vf_loss: 0.13310089494045982
    num_agent_steps_sampled: 8136744
    num_agent_steps_trained: 8136744
    num_steps_sampled: 8136744
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,814,128931,8136744,3.98817,14.62,-1.43,86.6696


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8146740
  custom_metrics: {}
  date: 2021-11-09_02-05-22
  done: false
  episode_len_mean: 90.27027027027027
  episode_media: {}
  episode_reward_max: 15.039999999999994
  episode_reward_mean: 5.346756756756769
  episode_reward_min: -1.5400000000000007
  episodes_this_iter: 111
  episodes_total: 88328
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.9669061289893257
          entropy_coeff: 0.01
          kl: 0.011958902861468947
          policy_loss: -0.05622205276074063
          total_loss: 0.0980270867005118
          vf_explained_var: 0.958682656288147
          vf_loss: 0.1466743245243262
    num_agent_steps_sampled: 8146740
    num_agent_steps_trained: 8146740
    num_steps_sampled: 8146740
    num_steps_trained: 81467

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,815,129081,8146740,5.34676,15.04,-1.54,90.2703




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8156736
  custom_metrics: {}
  date: 2021-11-09_02-08-17
  done: false
  episode_len_mean: 88.46902654867256
  episode_media: {}
  episode_reward_max: 12.550000000000013
  episode_reward_mean: 4.457522123893815
  episode_reward_min: -1.4000000000000008
  episodes_this_iter: 113
  episodes_total: 88441
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.00633488414634
          entropy_coeff: 0.01
          kl: 0.012531476352206632
          policy_loss: -0.05340250223620325
          total_loss: 0.11610962242827329
          vf_explained_var: 0.9483603835105896
          vf_loss: 0.16102720193612652
    num_agent_steps_sampled: 8156736
    num_agent_steps_trained: 8156736
    num_steps_sampled: 8156736
    num_steps_trained: 8156

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,816,129256,8156736,4.45752,12.55,-1.4,88.469




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8166732
  custom_metrics: {}
  date: 2021-11-09_02-11-13
  done: false
  episode_len_mean: 90.48648648648648
  episode_media: {}
  episode_reward_max: 16.37999999999997
  episode_reward_mean: 4.612612612612623
  episode_reward_min: -1.0700000000000005
  episodes_this_iter: 111
  episodes_total: 88552
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.024192133418515
          entropy_coeff: 0.01
          kl: 0.011168322929612413
          policy_loss: -0.0591984401552532
          total_loss: 0.09997132720951087
          vf_explained_var: 0.9531886577606201
          vf_loss: 0.15396885266925533
    num_agent_steps_sampled: 8166732
    num_agent_steps_trained: 8166732
    num_steps_sampled: 8166732
    num_steps_trained: 81667

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,817,129432,8166732,4.61261,16.38,-1.07,90.4865




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8176728
  custom_metrics: {}
  date: 2021-11-09_02-13-52
  done: false
  episode_len_mean: 88.20353982300885
  episode_media: {}
  episode_reward_max: 12.770000000000014
  episode_reward_mean: 4.695132743362842
  episode_reward_min: -1.3600000000000005
  episodes_this_iter: 113
  episodes_total: 88665
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0053594335531577
          entropy_coeff: 0.01
          kl: 0.012288864708889749
          policy_loss: -0.05573397155564565
          total_loss: 0.10295007531172955
          vf_explained_var: 0.9522085189819336
          vf_loss: 0.15074206980534344
    num_agent_steps_sampled: 8176728
    num_agent_steps_trained: 8176728
    num_steps_sampled: 8176728
    num_steps_trained: 81

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,818,129591,8176728,4.69513,12.77,-1.36,88.2035




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8186724
  custom_metrics: {}
  date: 2021-11-09_02-16-41
  done: false
  episode_len_mean: 89.05405405405405
  episode_media: {}
  episode_reward_max: 12.680000000000016
  episode_reward_mean: 4.312072072072081
  episode_reward_min: -2.14
  episodes_this_iter: 111
  episodes_total: 88776
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.999515148717114
          entropy_coeff: 0.01
          kl: 0.011814228881269739
          policy_loss: -0.05427690441919188
          total_loss: 0.10189257453824592
          vf_explained_var: 0.9493074417114258
          vf_loss: 0.14925034020701025
    num_agent_steps_sampled: 8186724
    num_agent_steps_trained: 8186724
    num_steps_sampled: 8186724
    num_steps_trained: 8186724
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,819,129759,8186724,4.31207,12.68,-2.14,89.0541




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8196720
  custom_metrics: {}
  date: 2021-11-09_02-19-35
  done: false
  episode_len_mean: 89.17699115044248
  episode_media: {}
  episode_reward_max: 18.13999999999993
  episode_reward_mean: 4.93008849557523
  episode_reward_min: -1.6200000000000012
  episodes_this_iter: 113
  episodes_total: 88889
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0298830561148815
          entropy_coeff: 0.01
          kl: 0.012871605226314371
          policy_loss: -0.05454446794067183
          total_loss: 0.11793348643307884
          vf_explained_var: 0.9539872407913208
          vf_loss: 0.16345365786781677
    num_agent_steps_sampled: 8196720
    num_agent_steps_trained: 8196720
    num_steps_sampled: 8196720
    num_steps_trained: 8196

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,820,129934,8196720,4.93009,18.14,-1.62,89.177




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8206716
  custom_metrics: {}
  date: 2021-11-09_02-22-14
  done: false
  episode_len_mean: 89.5
  episode_media: {}
  episode_reward_max: 12.250000000000018
  episode_reward_mean: 5.17544642857144
  episode_reward_min: -1.4100000000000006
  episodes_this_iter: 112
  episodes_total: 89001
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0032986124356587
          entropy_coeff: 0.01
          kl: 0.012909319454248355
          policy_loss: -0.053570504661681305
          total_loss: 0.1376221955132981
          vf_explained_var: 0.9496254324913025
          vf_loss: 0.18181664259929178
    num_agent_steps_sampled: 8206716
    num_agent_steps_trained: 8206716
    num_steps_sampled: 8206716
    num_steps_trained: 8206716
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,821,130093,8206716,5.17545,12.25,-1.41,89.5


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8216712
  custom_metrics: {}
  date: 2021-11-09_02-24-45
  done: false
  episode_len_mean: 87.46491228070175
  episode_media: {}
  episode_reward_max: 12.930000000000012
  episode_reward_mean: 4.801754385964921
  episode_reward_min: -1.950000000000001
  episodes_this_iter: 114
  episodes_total: 89115
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.9972474429342482
          entropy_coeff: 0.01
          kl: 0.011784482897517193
          policy_loss: -0.05647795741470196
          total_loss: 0.11393331580468986
          vf_explained_var: 0.9528455138206482
          vf_loss: 0.16353722207853172
    num_agent_steps_sampled: 8216712
    num_agent_steps_trained: 8216712
    num_steps_sampled: 8216712
    num_steps_trained: 821

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,822,130244,8216712,4.80175,12.93,-1.95,87.4649




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8226708
  custom_metrics: {}
  date: 2021-11-09_02-27-54
  done: false
  episode_len_mean: 88.20353982300885
  episode_media: {}
  episode_reward_max: 10.750000000000018
  episode_reward_mean: 4.563185840707974
  episode_reward_min: -1.7600000000000007
  episodes_this_iter: 113
  episodes_total: 89228
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0059926002453534
          entropy_coeff: 0.01
          kl: 0.012204271602577565
          policy_loss: -0.054213522393734025
          total_loss: 0.12198969945470747
          vf_explained_var: 0.949812114238739
          vf_loss: 0.16846029217012673
    num_agent_steps_sampled: 8226708
    num_agent_steps_trained: 8226708
    num_steps_sampled: 8226708
    num_steps_trained: 82

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,823,130432,8226708,4.56319,10.75,-1.76,88.2035




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8236704
  custom_metrics: {}
  date: 2021-11-09_02-30-44
  done: false
  episode_len_mean: 88.56637168141593
  episode_media: {}
  episode_reward_max: 12.900000000000016
  episode_reward_mean: 4.361592920353992
  episode_reward_min: -1.320000000000001
  episodes_this_iter: 113
  episodes_total: 89341
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.033000546133416
          entropy_coeff: 0.01
          kl: 0.011487110772446634
          policy_loss: -0.05708630595308466
          total_loss: 0.09646931735830556
          vf_explained_var: 0.9513277411460876
          vf_loss: 0.1477165522149358
    num_agent_steps_sampled: 8236704
    num_agent_steps_trained: 8236704
    num_steps_sampled: 8236704
    num_steps_trained: 82367

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,824,130602,8236704,4.36159,12.9,-1.32,88.5664


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8246700
  custom_metrics: {}
  date: 2021-11-09_02-33-11
  done: false
  episode_len_mean: 89.35398230088495
  episode_media: {}
  episode_reward_max: 14.570000000000014
  episode_reward_mean: 4.408053097345142
  episode_reward_min: -1.1700000000000006
  episodes_this_iter: 113
  episodes_total: 89454
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0192937332340795
          entropy_coeff: 0.01
          kl: 0.011551227777265805
          policy_loss: -0.058705697972805074
          total_loss: 0.08852769394015145
          vf_explained_var: 0.9419835209846497
          vf_loss: 0.1411111870302986
    num_agent_steps_sampled: 8246700
    num_agent_steps_trained: 8246700
    num_steps_sampled: 8246700
    num_steps_trained: 82

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,825,130750,8246700,4.40805,14.57,-1.17,89.354




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8256696
  custom_metrics: {}
  date: 2021-11-09_02-35-53
  done: false
  episode_len_mean: 88.66964285714286
  episode_media: {}
  episode_reward_max: 18.14999999999993
  episode_reward_mean: 4.69455357142858
  episode_reward_min: -1.3400000000000003
  episodes_this_iter: 112
  episodes_total: 89566
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0339315420542006
          entropy_coeff: 0.01
          kl: 0.01241780740456788
          policy_loss: -0.055576244708246146
          total_loss: 0.13499543519021992
          vf_explained_var: 0.9432545304298401
          vf_loss: 0.18262167585551994
    num_agent_steps_sampled: 8256696
    num_agent_steps_trained: 8256696
    num_steps_sampled: 8256696
    num_steps_trained: 8256

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,826,130912,8256696,4.69455,18.15,-1.34,88.6696




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8266692
  custom_metrics: {}
  date: 2021-11-09_02-38-37
  done: false
  episode_len_mean: 86.99130434782609
  episode_media: {}
  episode_reward_max: 14.780000000000014
  episode_reward_mean: 4.499304347826096
  episode_reward_min: -1.1700000000000008
  episodes_this_iter: 115
  episodes_total: 89681
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.022734294182215
          entropy_coeff: 0.01
          kl: 0.01157495658835197
          policy_loss: -0.0574080794556146
          total_loss: 0.0914863152596622
          vf_explained_var: 0.9453955292701721
          vf_loss: 0.14275253960846837
    num_agent_steps_sampled: 8266692
    num_agent_steps_trained: 8266692
    num_steps_sampled: 8266692
    num_steps_trained: 826669

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,827,131076,8266692,4.4993,14.78,-1.17,86.9913




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8276688
  custom_metrics: {}
  date: 2021-11-09_02-41-15
  done: false
  episode_len_mean: 89.83783783783784
  episode_media: {}
  episode_reward_max: 12.300000000000017
  episode_reward_mean: 4.718468468468479
  episode_reward_min: -0.3999999999999999
  episodes_this_iter: 111
  episodes_total: 89792
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0217610138094324
          entropy_coeff: 0.01
          kl: 0.012418595053688494
          policy_loss: -0.0596304470876184
          total_loss: 0.12068885410857251
          vf_explained_var: 0.9460579752922058
          vf_loss: 0.17224579890871533
    num_agent_steps_sampled: 8276688
    num_agent_steps_trained: 8276688
    num_steps_sampled: 8276688
    num_steps_trained: 827

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,828,131234,8276688,4.71847,12.3,-0.4,89.8378




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8286684
  custom_metrics: {}
  date: 2021-11-09_02-43-55
  done: false
  episode_len_mean: 90.60909090909091
  episode_media: {}
  episode_reward_max: 18.46999999999997
  episode_reward_mean: 5.06500000000001
  episode_reward_min: -1.5800000000000007
  episodes_this_iter: 110
  episodes_total: 89902
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.003424648443858
          entropy_coeff: 0.01
          kl: 0.012837637695826463
          policy_loss: -0.05350117004778968
          total_loss: 0.13702848939041998
          vf_explained_var: 0.9426419734954834
          vf_loss: 0.18131816193429579
    num_agent_steps_sampled: 8286684
    num_agent_steps_trained: 8286684
    num_steps_sampled: 8286684
    num_steps_trained: 82866

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,829,131393,8286684,5.065,18.47,-1.58,90.6091




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8296680
  custom_metrics: {}
  date: 2021-11-09_02-46-47
  done: false
  episode_len_mean: 90.69369369369369
  episode_media: {}
  episode_reward_max: 16.54000000000001
  episode_reward_mean: 4.751261261261271
  episode_reward_min: -1.6700000000000008
  episodes_this_iter: 111
  episodes_total: 90013
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.03239900556385
          entropy_coeff: 0.01
          kl: 0.011328914738309099
          policy_loss: -0.05779286799315586
          total_loss: 0.09977422639504711
          vf_explained_var: 0.9522592425346375
          vf_loss: 0.15208239803074772
    num_agent_steps_sampled: 8296680
    num_agent_steps_trained: 8296680
    num_steps_sampled: 8296680
    num_steps_trained: 82966

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,830,131566,8296680,4.75126,16.54,-1.67,90.6937


Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8306676
  custom_metrics: {}
  date: 2021-11-09_02-49-11
  done: false
  episode_len_mean: 91.69724770642202
  episode_media: {}
  episode_reward_max: 16.139999999999954
  episode_reward_mean: 4.733211009174321
  episode_reward_min: -1.6400000000000006
  episodes_this_iter: 109
  episodes_total: 90122
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0361287150627527
          entropy_coeff: 0.01
          kl: 0.012663154595238363
          policy_loss: -0.056102007796239646
          total_loss: 0.12427753371974597
          vf_explained_var: 0.9432310461997986
          vf_loss: 0.17189257805609806
    num_agent_steps_sampled: 8306676
    num_agent_steps_trained: 8306676
    num_steps_sampled: 8306676
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,831,131710,8306676,4.73321,16.14,-1.64,91.6972




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8316672
  custom_metrics: {}
  date: 2021-11-09_02-51-52
  done: false
  episode_len_mean: 89.26785714285714
  episode_media: {}
  episode_reward_max: 16.250000000000007
  episode_reward_mean: 4.338660714285724
  episode_reward_min: -1.2600000000000005
  episodes_this_iter: 112
  episodes_total: 90234
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.03472487580063
          entropy_coeff: 0.01
          kl: 0.011573274137000183
          policy_loss: -0.05551482806953355
          total_loss: 0.10164254566129201
          vf_explained_var: 0.9408558011054993
          vf_loss: 0.15113925820487178
    num_agent_steps_sampled: 8316672
    num_agent_steps_trained: 8316672
    num_steps_sampled: 8316672
    num_steps_trained: 8316

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,832,131871,8316672,4.33866,16.25,-1.26,89.2679




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8326668
  custom_metrics: {}
  date: 2021-11-09_02-54-46
  done: false
  episode_len_mean: 88.83928571428571
  episode_media: {}
  episode_reward_max: 12.540000000000015
  episode_reward_mean: 4.3713392857142965
  episode_reward_min: -0.9000000000000002
  episodes_this_iter: 112
  episodes_total: 90346
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0186942047542997
          entropy_coeff: 0.01
          kl: 0.011883352749364234
          policy_loss: -0.05697083067960846
          total_loss: 0.10043637668713927
          vf_explained_var: 0.9506039023399353
          vf_loss: 0.15052238470500606
    num_agent_steps_sampled: 8326668
    num_agent_steps_trained: 8326668
    num_steps_sampled: 8326668
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,833,132044,8326668,4.37134,12.54,-0.9,88.8393




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8336664
  custom_metrics: {}
  date: 2021-11-09_02-57-45
  done: false
  episode_len_mean: 88.47787610619469
  episode_media: {}
  episode_reward_max: 16.730000000000008
  episode_reward_mean: 4.751327433628328
  episode_reward_min: -0.2600000000000009
  episodes_this_iter: 113
  episodes_total: 90459
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.032829807049189
          entropy_coeff: 0.01
          kl: 0.012237973376792492
          policy_loss: -0.05616273751211727
          total_loss: 0.10196639432723069
          vf_explained_var: 0.9560261964797974
          vf_loss: 0.15057779578651245
    num_agent_steps_sampled: 8336664
    num_agent_steps_trained: 8336664
    num_steps_sampled: 8336664
    num_steps_trained: 833

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,834,132223,8336664,4.75133,16.73,-0.26,88.4779




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8346660
  custom_metrics: {}
  date: 2021-11-09_03-00-45
  done: false
  episode_len_mean: 86.19827586206897
  episode_media: {}
  episode_reward_max: 16.409999999999982
  episode_reward_mean: 5.510689655172422
  episode_reward_min: -1.4800000000000004
  episodes_this_iter: 116
  episodes_total: 90575
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 1.991789450502803
          entropy_coeff: 0.01
          kl: 0.012586459669780201
          policy_loss: -0.052989495219264784
          total_loss: 0.14960848445980213
          vf_explained_var: 0.9528253674507141
          vf_loss: 0.19384234425349114
    num_agent_steps_sampled: 8346660
    num_agent_steps_trained: 8346660
    num_steps_sampled: 8346660
    num_steps_trained: 83

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,835,132403,8346660,5.51069,16.41,-1.48,86.1983




Result for PPO_my_env_da758_00000:
  agent_timesteps_total: 8356656
  custom_metrics: {}
  date: 2021-11-09_03-03-31
  done: false
  episode_len_mean: 89.77477477477477
  episode_media: {}
  episode_reward_max: 12.700000000000015
  episode_reward_mean: 4.728738738738749
  episode_reward_min: -1.5900000000000005
  episodes_this_iter: 111
  episodes_total: 90686
  experiment_id: 80d9cec4f47b40b6adb7244052f82fbe
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 0.00010000000000000002
          entropy: 2.0134148326694454
          entropy_coeff: 0.01
          kl: 0.012314572681975198
          policy_loss: -0.05799217506622275
          total_loss: 0.11674719932608497
          vf_explained_var: 0.9464603662490845
          vf_loss: 0.16681938608391927
    num_agent_steps_sampled: 8356656
    num_agent_steps_trained: 8356656
    num_steps_sampled: 8356656
    num_steps_trained: 83

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_da758_00000,RUNNING,192.168.3.5:558908,836,132569,8356656,4.72874,12.7,-1.59,89.7748


