In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=2, stride=2, padding=0),  
            nn.ELU(),
            nn.Conv2d(32, 32, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(32, 64, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(64, 128, kernel_size=2, stride=2, padding=0),
            nn.ELU(), 
            nn.Conv2d(128, 256, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Conv2d(256, 512, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
from torch.nn.functional import one_hot

class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        visual_features_dim = 512
        target_features_dim = 9 * 11 * 11 
        self.visual_encoder = VisualEncoder()
        self.visual_encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AngelaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.target_encoder = nn.Sequential(
            nn.Conv3d(7, 1, kernel_size=1, stride=1, padding=0),
            nn.ELU(),
        )
        policy_hidden_dim = 256 
        self.policy_network = nn.Sequential(
            nn.Linear(visual_features_dim + target_features_dim, 1024),
            nn.ELU(),
            nn.Linear(1024, 512),
            nn.ELU(),
            nn.Linear(512, policy_hidden_dim),
            nn.ELU(),
            nn.Linear(policy_hidden_dim, policy_hidden_dim),
            nn.ELU(),
            #nn.Linear(policy_hidden_dim, policy_hidden_dim),
            #nn.ELU(),
        )
        self.action_head = nn.Linear(policy_hidden_dim, action_space.n)
        self.value_head = nn.Linear(policy_hidden_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.visual_encoder.cuda()
            self.target_encoder.cuda()
            self.policy_network.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs']
        pov = obs['pov'].permute(0, 3, 1, 2).float() / 255.0
        target = one_hot(obs['target_grid'].long(), num_classes=7).permute(0, 4, 1, 2, 3).float()
        if self.use_cuda:
            pov.cuda()
            target.cuda()
            
        with torch.no_grad():
            visual_features = self.visual_encoder(pov)
            
        target_features = self.target_encoder(target)
        target_features = target_features.reshape(target_features.shape[0], -1)
        features = torch.cat([visual_features, target_features], dim=1)
        features = self.policy_network(features)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
class VisualObservationWrapper(ObsWrapper):
    def __init__(self, env, include_target=False):
        super().__init__(env)
        self.observation_space = {   
            'pov': gym.spaces.Box(low=0, high=255, shape=(64, 64, 3)),
            'inventory': gym.spaces.Box(low=0.0, high=20.0, shape=(6,)),
            'compass': gym.spaces.Box(low=-180.0, high=180.0, shape=(1,))
        }
        if include_target:
            self.observation_space['target_grid'] = \
                gym.spaces.Box(low=0, high=6, shape=(9, 11, 11))
        self.observation_space = gym.spaces.Dict(self.observation_space)

    def observation(self, obs, reward=None, done=None, info=None):
        if info is not None:
            if 'target_grid' in info:
                target_grid = info['target_grid']
                del info['target_grid']
            else:
                logger.error(f'info: {info}')
                if hasattr(self.unwrapped, 'should_reset'):
                    self.unwrapped.should_reset(True)
                target_grid = self.env.unwrapped.tasks.current.target_grid
        else:
            target_grid = self.env.unwrapped.tasks.current.target_grid
        return {
            'pov': obs['pov'].astype(np.float32),
            'inventory': obs['inventory'],
            'compass': np.array([obs['compass']['angle'].item()]),
            'target_grid': target_grid
        }

In [6]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, rew):
        if rew == 0:
            rew = -0.01
        if abs(rew) == 1:
            rew /= 10
        return rew
    
def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=250)
    env.update_taskset(TaskSet(preset=['C12']))
    #env = PovOnlyWrapper(env)
    env = VisualObservationWrapper(env, include_target=True)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    env = RewardWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 3,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             #"gamma": 0.99,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO MultiTask (C12) pretrained (AngelaCNN) (3 noops after placement) r: -0.01 div10"
                  }
              }

        },
        loggers=[WandbLogger],
        local_dir="/IGLU-Minecraft/checkpoints/C12",
        keep_checkpoints_num=50,
        checkpoint_freq=5,
        checkpoint_at_end=True,
        #restore="/IGLU-Minecraft/checkpoints/4_tasks/PPO_2021-11-08_20-28-45/PPO_my_env_78cf0_00000_0_2021-11-08_20-28-45/checkpoint_000050/checkpoint-50"
        )



Trial name,status,loc
PPO_my_env_c1ec9_00000,PENDING,


2021-11-13 21:29:18,085	INFO wandb.py:170 -- Already logged into W&B.
2021-11-13 21:29:19,713	ERROR syncer.py:72 -- Log sync requires rsync to be installed.
[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[2m[36m(pid=97382)[0m 2021-11-13 21:29:21,589	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=97382)[0m 2021-11-13 21:29:21,589	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[34m[1mwandb[0m: wandb version 0.12.6 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 1998
  custom_metrics: {}
  date: 2021-11-13_21-31-01
  done: false
  episode_len_mean: 101.22222222222223
  episode_media: {}
  episode_reward_max: 2.82
  episode_reward_mean: -0.6011111111111115
  episode_reward_min: -1.1400000000000008
  episodes_this_iter: 18
  episodes_total: 18
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.882840056646438
          entropy_coeff: 0.009999999999999998
          kl: 0.005703404819647301
          policy_loss: 0.16548543302785781
          total_loss: 0.1672038088951792
          vf_explained_var: -0.32715457677841187
          vf_loss: 0.02940609449565056
    num_agent_steps_sampled: 1998
    num_agent_steps_trained: 1998
    num_steps_sampled: 1998
    num_steps_trained: 1998
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,1,93.6059,1998,-0.601111,2.82,-1.14,101.222


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 3996
  custom_metrics: {}
  date: 2021-11-13_21-31-33
  done: false
  episode_len_mean: 99.46153846153847
  episode_media: {}
  episode_reward_max: 4.7500000000000036
  episode_reward_mean: -0.4725641025641029
  episode_reward_min: -1.1400000000000008
  episodes_this_iter: 21
  episodes_total: 39
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.878673170861744
          entropy_coeff: 0.009999999999999998
          kl: 0.005560556773412848
          policy_loss: 0.05986297102201553
          total_loss: 0.06258175199230512
          vf_explained_var: 0.003969042561948299
          vf_loss: 0.030393400350363836
    num_agent_steps_sampled: 3996
    num_agent_steps_trained: 3996
    num_steps_sampled: 3996
    num_steps_trained: 3996
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,2,126.128,3996,-0.472564,4.75,-1.14,99.4615


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 5994
  custom_metrics: {}
  date: 2021-11-13_21-32-06
  done: false
  episode_len_mean: 99.15
  episode_media: {}
  episode_reward_max: 4.7500000000000036
  episode_reward_mean: -0.597166666666667
  episode_reward_min: -1.1400000000000008
  episodes_this_iter: 21
  episodes_total: 60
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.869972423144749
          entropy_coeff: 0.009999999999999998
          kl: 0.010543064373076068
          policy_loss: -0.035259030706116135
          total_loss: -0.006898064184046927
          vf_explained_var: 0.19810587167739868
          vf_loss: 0.054952078466747135
    num_agent_steps_sampled: 5994
    num_agent_steps_trained: 5994
    num_steps_sampled: 5994
    num_steps_trained: 5994
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,3,158.638,5994,-0.597167,4.75,-1.14,99.15


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 7992
  custom_metrics: {}
  date: 2021-11-13_21-32-40
  done: false
  episode_len_mean: 99.3974358974359
  episode_media: {}
  episode_reward_max: 6.380000000000006
  episode_reward_mean: 0.09307692307692357
  episode_reward_min: -1.1400000000000008
  episodes_this_iter: 18
  episodes_total: 78
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.8304984501429966
          entropy_coeff: 0.009999999999999998
          kl: 0.009921468008821436
          policy_loss: -0.0056156604506430174
          total_loss: 0.520181911048435
          vf_explained_var: 0.32595294713974
          vf_loss: 0.552118263854867
    num_agent_steps_sampled: 7992
    num_agent_steps_trained: 7992
    num_steps_sampled: 7992
    num_steps_trained: 7992
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,4,192.432,7992,0.0930769,6.38,-1.14,99.3974


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 9990
  custom_metrics: {}
  date: 2021-11-13_21-33-06
  done: false
  episode_len_mean: 98.52
  episode_media: {}
  episode_reward_max: 6.590000000000008
  episode_reward_mean: 0.6711000000000013
  episode_reward_min: -1.1400000000000008
  episodes_this_iter: 22
  episodes_total: 100
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.811806241671244
          entropy_coeff: 0.009999999999999998
          kl: 0.011438386647242367
          policy_loss: -0.01165941542103177
          total_loss: 0.4895619119916643
          vf_explained_var: 0.5742881298065186
          vf_loss: 0.5270517118629955
    num_agent_steps_sampled: 9990
    num_agent_steps_trained: 9990
    num_steps_sampled: 9990
    num_steps_trained: 9990
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,5,218.483,9990,0.6711,6.59,-1.14,98.52


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 11988
  custom_metrics: {}
  date: 2021-11-13_21-33-30
  done: false
  episode_len_mean: 98.09
  episode_media: {}
  episode_reward_max: 6.590000000000008
  episode_reward_mean: 1.3642000000000025
  episode_reward_min: -1.1200000000000008
  episodes_this_iter: 20
  episodes_total: 120
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.7578937905175347
          entropy_coeff: 0.009999999999999998
          kl: 0.01130056646144069
          policy_loss: -0.013157294335819426
          total_loss: 0.4043581335140126
          vf_explained_var: 0.6490393280982971
          vf_loss: 0.4428342506289482
    num_agent_steps_sampled: 11988
    num_agent_steps_trained: 11988
    num_steps_sampled: 11988
    num_steps_trained: 11988
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,6,243.077,11988,1.3642,6.59,-1.12,98.09


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 13986
  custom_metrics: {}
  date: 2021-11-13_21-33-56
  done: false
  episode_len_mean: 97.46
  episode_media: {}
  episode_reward_max: 6.79000000000001
  episode_reward_mean: 2.3392000000000044
  episode_reward_min: -1.1200000000000008
  episodes_this_iter: 21
  episodes_total: 141
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.706659454391116
          entropy_coeff: 0.009999999999999998
          kl: 0.014662390953454209
          policy_loss: -0.026703608319872903
          total_loss: 0.46968294225987933
          vf_explained_var: 0.6787059903144836
          vf_loss: 0.5205206707119941
    num_agent_steps_sampled: 13986
    num_agent_steps_trained: 13986
    num_steps_sampled: 13986
    num_steps_trained: 13986
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,7,269.033,13986,2.3392,6.79,-1.12,97.46


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 15984
  custom_metrics: {}
  date: 2021-11-13_21-34-23
  done: false
  episode_len_mean: 96.43
  episode_media: {}
  episode_reward_max: 6.990000000000009
  episode_reward_mean: 3.303100000000007
  episode_reward_min: -1.1200000000000008
  episodes_this_iter: 22
  episodes_total: 163
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.679888422148568
          entropy_coeff: 0.009999999999999998
          kl: 0.010000597339444027
          policy_loss: 0.014961325164352144
          total_loss: 0.4654313227250462
          vf_explained_var: 0.7398209571838379
          vf_loss: 0.4752687643681254
    num_agent_steps_sampled: 15984
    num_agent_steps_trained: 15984
    num_steps_sampled: 15984
    num_steps_trained: 15984
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,8,295.847,15984,3.3031,6.99,-1.12,96.43


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 17982
  custom_metrics: {}
  date: 2021-11-13_21-34-49
  done: false
  episode_len_mean: 95.68
  episode_media: {}
  episode_reward_max: 6.990000000000009
  episode_reward_mean: 3.7790000000000084
  episode_reward_min: -0.9500000000000006
  episodes_this_iter: 21
  episodes_total: 184
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.674108133997236
          entropy_coeff: 0.009999999999999998
          kl: 0.010272174737145045
          policy_loss: 0.005929579514832724
          total_loss: 0.40065486207604406
          vf_explained_var: 0.8071668148040771
          vf_loss: 0.41941193079664596
    num_agent_steps_sampled: 17982
    num_agent_steps_trained: 17982
    num_steps_sampled: 17982
    num_steps_trained: 17982
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,9,321.304,17982,3.779,6.99,-0.95,95.68


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 19980
  custom_metrics: {}
  date: 2021-11-13_21-35-16
  done: false
  episode_len_mean: 94.93
  episode_media: {}
  episode_reward_max: 6.990000000000009
  episode_reward_mean: 4.03010000000001
  episode_reward_min: 0.7399999999999993
  episodes_this_iter: 22
  episodes_total: 206
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.6297016881761097
          entropy_coeff: 0.009999999999999998
          kl: 0.009827202918365996
          policy_loss: -0.016747807427531198
          total_loss: 0.4128625229710624
          vf_explained_var: 0.8076090216636658
          vf_loss: 0.45394190968502135
    num_agent_steps_sampled: 19980
    num_agent_steps_trained: 19980
    num_steps_sampled: 19980
    num_steps_trained: 19980
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,10,348.132,19980,4.0301,6.99,0.74,94.93


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 21978
  custom_metrics: {}
  date: 2021-11-13_21-35-42
  done: false
  episode_len_mean: 94.08
  episode_media: {}
  episode_reward_max: 6.990000000000009
  episode_reward_mean: 4.359100000000011
  episode_reward_min: 0.9600000000000022
  episodes_this_iter: 21
  episodes_total: 227
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.566334101131984
          entropy_coeff: 0.009999999999999998
          kl: 0.009784010922343956
          policy_loss: -0.008979354550441106
          total_loss: 0.47313478894176936
          vf_explained_var: 0.8128864169120789
          vf_loss: 0.5058206854122026
    num_agent_steps_sampled: 21978
    num_agent_steps_trained: 21978
    num_steps_sampled: 21978
    num_steps_trained: 21978
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,11,374.677,21978,4.3591,6.99,0.96,94.08


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 23976
  custom_metrics: {}
  date: 2021-11-13_21-36-09
  done: false
  episode_len_mean: 93.97
  episode_media: {}
  episode_reward_max: 6.990000000000009
  episode_reward_mean: 4.528600000000012
  episode_reward_min: 0.9600000000000022
  episodes_this_iter: 21
  episodes_total: 248
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.5478057759148736
          entropy_coeff: 0.009999999999999998
          kl: 0.009920251516571084
          policy_loss: -0.015570498594925517
          total_loss: 0.4577688075247265
          vf_explained_var: 0.8042890429496765
          vf_loss: 0.4968333112342017
    num_agent_steps_sampled: 23976
    num_agent_steps_trained: 23976
    num_steps_sampled: 23976
    num_steps_trained: 23976
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,12,401.695,23976,4.5286,6.99,0.96,93.97


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 25974
  custom_metrics: {}
  date: 2021-11-13_21-36-35
  done: false
  episode_len_mean: 94.25
  episode_media: {}
  episode_reward_max: 6.760000000000004
  episode_reward_mean: 4.703100000000013
  episode_reward_min: 0.9600000000000022
  episodes_this_iter: 20
  episodes_total: 268
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.519694992474147
          entropy_coeff: 0.009999999999999998
          kl: 0.009893422050303757
          policy_loss: -0.03157525990335714
          total_loss: 0.5100091284584432
          vf_explained_var: 0.7804846167564392
          vf_loss: 0.5648026558614913
    num_agent_steps_sampled: 25974
    num_agent_steps_trained: 25974
    num_steps_sampled: 25974
    num_steps_trained: 25974
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,13,427.67,25974,4.7031,6.76,0.96,94.25


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 27972
  custom_metrics: {}
  date: 2021-11-13_21-36-58
  done: false
  episode_len_mean: 95.76
  episode_media: {}
  episode_reward_max: 6.760000000000004
  episode_reward_mean: 4.669800000000013
  episode_reward_min: 0.9600000000000022
  episodes_this_iter: 20
  episodes_total: 288
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.5486324707667034
          entropy_coeff: 0.009999999999999998
          kl: 0.008276939229526565
          policy_loss: -0.02077431029507092
          total_loss: 0.4885985729062841
          vf_explained_var: 0.810821533203125
          vf_loss: 0.53320381911028
    num_agent_steps_sampled: 27972
    num_agent_steps_trained: 27972
    num_steps_sampled: 27972
    num_steps_trained: 27972
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,14,450.442,27972,4.6698,6.76,0.96,95.76


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 29970
  custom_metrics: {}
  date: 2021-11-13_21-37-21
  done: false
  episode_len_mean: 97.08
  episode_media: {}
  episode_reward_max: 12.360000000000015
  episode_reward_mean: 5.042300000000014
  episode_reward_min: 2.2300000000000098
  episodes_this_iter: 21
  episodes_total: 309
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.5178912457965668
          entropy_coeff: 0.009999999999999998
          kl: 0.008670977281085455
          policy_loss: -0.010298847176489375
          total_loss: 0.522636164582911
          vf_explained_var: 0.7854509353637695
          vf_loss: 0.5563797234069734
    num_agent_steps_sampled: 29970
    num_agent_steps_trained: 29970
    num_steps_sampled: 29970
    num_steps_trained: 29970
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,15,473.828,29970,5.0423,12.36,2.23,97.08


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 31968
  custom_metrics: {}
  date: 2021-11-13_21-37-44
  done: false
  episode_len_mean: 99.11
  episode_media: {}
  episode_reward_max: 12.360000000000015
  episode_reward_mean: 5.047900000000015
  episode_reward_min: 2.2700000000000156
  episodes_this_iter: 19
  episodes_total: 328
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.480460218020848
          entropy_coeff: 0.009999999999999998
          kl: 0.007375718710702233
          policy_loss: -0.03663729881601674
          total_loss: 0.3109915445957865
          vf_explained_var: 0.8462395668029785
          vf_loss: 0.3709583030570121
    num_agent_steps_sampled: 31968
    num_agent_steps_trained: 31968
    num_steps_sampled: 31968
    num_steps_trained: 31968
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,16,496.636,31968,5.0479,12.36,2.27,99.11


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 33966
  custom_metrics: {}
  date: 2021-11-13_21-38-07
  done: false
  episode_len_mean: 99.95
  episode_media: {}
  episode_reward_max: 12.360000000000015
  episode_reward_mean: 5.176400000000016
  episode_reward_min: 2.1800000000000117
  episodes_this_iter: 18
  episodes_total: 346
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.4604553813026064
          entropy_coeff: 0.009999999999999998
          kl: 0.010763841822640185
          policy_loss: -0.006218980447877021
          total_loss: 0.4575322930301939
          vf_explained_var: 0.8304415941238403
          vf_loss: 0.48620306281816394
    num_agent_steps_sampled: 33966
    num_agent_steps_trained: 33966
    num_steps_sampled: 33966
    num_steps_trained: 33966
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,17,519.805,33966,5.1764,12.36,2.18,99.95




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 35964
  custom_metrics: {}
  date: 2021-11-13_21-38-52
  done: false
  episode_len_mean: 99.53
  episode_media: {}
  episode_reward_max: 12.360000000000015
  episode_reward_mean: 5.022000000000016
  episode_reward_min: -0.15
  episodes_this_iter: 24
  episodes_total: 370
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.4607850494838894
          entropy_coeff: 0.009999999999999998
          kl: 0.008753717701278054
          policy_loss: -0.011789269603434063
          total_loss: 0.5987747496082669
          vf_explained_var: 0.7473449110984802
          vf_loss: 0.6334211294140134
    num_agent_steps_sampled: 35964
    num_agent_steps_trained: 35964
    num_steps_sampled: 35964
    num_steps_trained: 35964
  iterations_since_restore: 18
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,18,564.42,35964,5.022,12.36,-0.15,99.53


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 37962
  custom_metrics: {}
  date: 2021-11-13_21-39-17
  done: false
  episode_len_mean: 99.54
  episode_media: {}
  episode_reward_max: 12.360000000000015
  episode_reward_mean: 5.205400000000017
  episode_reward_min: -0.15
  episodes_this_iter: 18
  episodes_total: 388
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.4415252560660954
          entropy_coeff: 0.009999999999999998
          kl: 0.0119952764801665
          policy_loss: -0.027045181500060217
          total_loss: 0.40284756019356704
          vf_explained_var: 0.8170909881591797
          vf_loss: 0.4519089400058701
    num_agent_steps_sampled: 37962
    num_agent_steps_trained: 37962
    num_steps_sampled: 37962
    num_steps_trained: 37962
  iterations_since_restore: 19
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,19,589.157,37962,5.2054,12.36,-0.15,99.54


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 39960
  custom_metrics: {}
  date: 2021-11-13_21-39-42
  done: false
  episode_len_mean: 99.87
  episode_media: {}
  episode_reward_max: 10.230000000000018
  episode_reward_mean: 5.151900000000017
  episode_reward_min: -0.15
  episodes_this_iter: 20
  episodes_total: 408
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.4050423769723803
          entropy_coeff: 0.009999999999999998
          kl: 0.009994801099675989
          policy_loss: 0.00885747651613894
          total_loss: 0.35337225826723234
          vf_explained_var: 0.852358341217041
          vf_loss: 0.3665662453997703
    num_agent_steps_sampled: 39960
    num_agent_steps_trained: 39960
    num_steps_sampled: 39960
    num_steps_trained: 39960
  iterations_since_restore: 20
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,20,613.787,39960,5.1519,10.23,-0.15,99.87


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 41958
  custom_metrics: {}
  date: 2021-11-13_21-40-06
  done: false
  episode_len_mean: 99.38
  episode_media: {}
  episode_reward_max: 10.230000000000018
  episode_reward_mean: 5.235600000000017
  episode_reward_min: -0.15
  episodes_this_iter: 20
  episodes_total: 428
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.3866292646953036
          entropy_coeff: 0.009999999999999998
          kl: 0.010569713830983117
          policy_loss: -0.066588300448798
          total_loss: 0.28426548191124484
          vf_explained_var: 0.8414488434791565
          vf_loss: 0.3726061315763564
    num_agent_steps_sampled: 41958
    num_agent_steps_trained: 41958
    num_steps_sampled: 41958
    num_steps_trained: 41958
  iterations_since_restore: 21
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,21,638.371,41958,5.2356,10.23,-0.15,99.38


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 43956
  custom_metrics: {}
  date: 2021-11-13_21-40-30
  done: false
  episode_len_mean: 99.55
  episode_media: {}
  episode_reward_max: 10.230000000000018
  episode_reward_mean: 5.1689000000000185
  episode_reward_min: -0.15
  episodes_this_iter: 21
  episodes_total: 449
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.3594072580337526
          entropy_coeff: 0.009999999999999998
          kl: 0.012280513917090057
          policy_loss: -0.009734727690617244
          total_loss: 0.3554318061009759
          vf_explained_var: 0.8700714111328125
          vf_loss: 0.3863044994927588
    num_agent_steps_sampled: 43956
    num_agent_steps_trained: 43956
    num_steps_sampled: 43956
    num_steps_trained: 43956
  iterations_since_restore: 22
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,22,662.365,43956,5.1689,10.23,-0.15,99.55


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 45954
  custom_metrics: {}
  date: 2021-11-13_21-40-53
  done: false
  episode_len_mean: 100.8
  episode_media: {}
  episode_reward_max: 10.230000000000018
  episode_reward_mean: 5.4810000000000185
  episode_reward_min: 2.1200000000000188
  episodes_this_iter: 18
  episodes_total: 467
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.326967293875558
          entropy_coeff: 0.009999999999999998
          kl: 0.009912723590650336
          policy_loss: -0.044150541598598166
          total_loss: 0.36696852433184785
          vf_explained_var: 0.8323751091957092
          vf_loss: 0.4324061919535909
    num_agent_steps_sampled: 45954
    num_agent_steps_trained: 45954
    num_steps_sampled: 45954
    num_steps_trained: 45954
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,23,685.252,45954,5.481,10.23,2.12,100.8


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 47952
  custom_metrics: {}
  date: 2021-11-13_21-41-16
  done: false
  episode_len_mean: 100.21
  episode_media: {}
  episode_reward_max: 10.370000000000017
  episode_reward_mean: 5.572500000000019
  episode_reward_min: 2.4600000000000155
  episodes_this_iter: 21
  episodes_total: 488
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.3350787196840557
          entropy_coeff: 0.009999999999999998
          kl: 0.013640002317765498
          policy_loss: -0.05110659074215662
          total_loss: 0.36989814889218126
          vf_explained_var: 0.8353806138038635
          vf_loss: 0.44162752699284324
    num_agent_steps_sampled: 47952
    num_agent_steps_trained: 47952
    num_steps_sampled: 47952
    num_steps_trained: 47952
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,24,708.481,47952,5.5725,10.37,2.46,100.21


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 49950
  custom_metrics: {}
  date: 2021-11-13_21-41-39
  done: false
  episode_len_mean: 100.59
  episode_media: {}
  episode_reward_max: 10.370000000000017
  episode_reward_mean: 5.667500000000018
  episode_reward_min: 2.4600000000000155
  episodes_this_iter: 19
  episodes_total: 507
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.356444097700573
          entropy_coeff: 0.009999999999999998
          kl: 0.010378470414637539
          policy_loss: -0.06722512328553767
          total_loss: 0.30292990761143823
          vf_explained_var: 0.8523162603378296
          vf_loss: 0.3916437790507362
    num_agent_steps_sampled: 49950
    num_agent_steps_trained: 49950
    num_steps_sampled: 49950
    num_steps_trained: 49950
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,25,731.531,49950,5.6675,10.37,2.46,100.59


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 51948
  custom_metrics: {}
  date: 2021-11-13_21-42-03
  done: false
  episode_len_mean: 100.32
  episode_media: {}
  episode_reward_max: 10.370000000000017
  episode_reward_mean: 5.709200000000019
  episode_reward_min: 2.4600000000000155
  episodes_this_iter: 20
  episodes_total: 527
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.22615560009366
          entropy_coeff: 0.009999999999999998
          kl: 0.010876461193898226
          policy_loss: 0.03580805379010382
          total_loss: 0.329451625226509
          vf_explained_var: 0.8735321164131165
          vf_loss: 0.3137298315763474
    num_agent_steps_sampled: 51948
    num_agent_steps_trained: 51948
    num_steps_sampled: 51948
    num_steps_trained: 51948
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,26,755.368,51948,5.7092,10.37,2.46,100.32


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 53946
  custom_metrics: {}
  date: 2021-11-13_21-42-27
  done: false
  episode_len_mean: 100.42
  episode_media: {}
  episode_reward_max: 10.370000000000017
  episode_reward_mean: 5.92750000000002
  episode_reward_min: 2.6400000000000183
  episodes_this_iter: 21
  episodes_total: 548
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.287757693018232
          entropy_coeff: 0.009999999999999998
          kl: 0.012124861658591116
          policy_loss: -0.047380413221461436
          total_loss: 0.39520905942079565
          vf_explained_var: 0.8263121247291565
          vf_loss: 0.4630420730937095
    num_agent_steps_sampled: 53946
    num_agent_steps_trained: 53946
    num_steps_sampled: 53946
    num_steps_trained: 53946
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,27,778.9,53946,5.9275,10.37,2.64,100.42


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 55944
  custom_metrics: {}
  date: 2021-11-13_21-42-50
  done: false
  episode_len_mean: 100.9
  episode_media: {}
  episode_reward_max: 10.370000000000017
  episode_reward_mean: 5.85860000000002
  episode_reward_min: 2.6400000000000183
  episodes_this_iter: 19
  episodes_total: 567
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.2799507504417784
          entropy_coeff: 0.009999999999999998
          kl: 0.009121437369209756
          policy_loss: 0.00758086166211537
          total_loss: 0.2887851867647398
          vf_explained_var: 0.883094072341919
          vf_loss: 0.30217954367399213
    num_agent_steps_sampled: 55944
    num_agent_steps_trained: 55944
    num_steps_sampled: 55944
    num_steps_trained: 55944
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,28,801.668,55944,5.8586,10.37,2.64,100.9


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 57942
  custom_metrics: {}
  date: 2021-11-13_21-43-12
  done: false
  episode_len_mean: 101.66
  episode_media: {}
  episode_reward_max: 10.180000000000017
  episode_reward_mean: 5.797500000000021
  episode_reward_min: 2.6400000000000183
  episodes_this_iter: 19
  episodes_total: 586
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.253344862801688
          entropy_coeff: 0.009999999999999998
          kl: 0.01051790001270669
          policy_loss: -0.022880421915934198
          total_loss: 0.390043277861107
          vf_explained_var: 0.8290706276893616
          vf_loss: 0.43335356797490804
    num_agent_steps_sampled: 57942
    num_agent_steps_trained: 57942
    num_steps_sampled: 57942
    num_steps_trained: 57942
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,29,824.409,57942,5.7975,10.18,2.64,101.66


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 59940
  custom_metrics: {}
  date: 2021-11-13_21-43-36
  done: false
  episode_len_mean: 101.19
  episode_media: {}
  episode_reward_max: 14.35000000000002
  episode_reward_mean: 5.97190000000002
  episode_reward_min: 2.6400000000000183
  episodes_this_iter: 20
  episodes_total: 606
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.206914116087414
          entropy_coeff: 0.009999999999999998
          kl: 0.012791359058644524
          policy_loss: 0.010566581005141849
          total_loss: 0.45577174350619315
          vf_explained_var: 0.853949248790741
          vf_loss: 0.46471603001867023
    num_agent_steps_sampled: 59940
    num_agent_steps_trained: 59940
    num_steps_sampled: 59940
    num_steps_trained: 59940
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,30,847.839,59940,5.9719,14.35,2.64,101.19


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 61938
  custom_metrics: {}
  date: 2021-11-13_21-43-59
  done: false
  episode_len_mean: 102.18
  episode_media: {}
  episode_reward_max: 14.35000000000002
  episode_reward_mean: 6.2725000000000195
  episode_reward_min: 3.3900000000000245
  episodes_this_iter: 18
  episodes_total: 624
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.223039882523673
          entropy_coeff: 0.009999999999999998
          kl: 0.013395267183018063
          policy_loss: -0.05143552449132715
          total_loss: 0.3994637977154482
          vf_explained_var: 0.8367322683334351
          vf_loss: 0.4704506659791583
    num_agent_steps_sampled: 61938
    num_agent_steps_trained: 61938
    num_steps_sampled: 61938
    num_steps_trained: 61938
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,31,870.901,61938,6.2725,14.35,3.39,102.18


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 63936
  custom_metrics: {}
  date: 2021-11-13_21-44-23
  done: false
  episode_len_mean: 103.38
  episode_media: {}
  episode_reward_max: 14.35000000000002
  episode_reward_mean: 6.302900000000019
  episode_reward_min: 3.3900000000000245
  episodes_this_iter: 20
  episodes_total: 644
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.165848616191319
          entropy_coeff: 0.009999999999999998
          kl: 0.013245639300085151
          policy_loss: -0.022923911096794265
          total_loss: 0.4228241551135268
          vf_explained_var: 0.8276990652084351
          vf_loss: 0.464757424451056
    num_agent_steps_sampled: 63936
    num_agent_steps_trained: 63936
    num_steps_sampled: 63936
    num_steps_trained: 63936
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,32,895.17,63936,6.3029,14.35,3.39,103.38


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 65934
  custom_metrics: {}
  date: 2021-11-13_21-44-47
  done: false
  episode_len_mean: 102.74
  episode_media: {}
  episode_reward_max: 14.35000000000002
  episode_reward_mean: 6.522700000000019
  episode_reward_min: 3.3900000000000245
  episodes_this_iter: 20
  episodes_total: 664
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.1413143441790625
          entropy_coeff: 0.009999999999999998
          kl: 0.012360721057669763
          policy_loss: -0.06393946464217845
          total_loss: 0.20303927895923454
          vf_explained_var: 0.917038083076477
          vf_loss: 0.2859197423571632
    num_agent_steps_sampled: 65934
    num_agent_steps_trained: 65934
    num_steps_sampled: 65934
    num_steps_trained: 65934
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,33,918.586,65934,6.5227,14.35,3.39,102.74


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 67932
  custom_metrics: {}
  date: 2021-11-13_21-45-10
  done: false
  episode_len_mean: 102.13
  episode_media: {}
  episode_reward_max: 14.35000000000002
  episode_reward_mean: 6.683100000000019
  episode_reward_min: 3.44000000000001
  episodes_this_iter: 21
  episodes_total: 685
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.215674268631708
          entropy_coeff: 0.009999999999999998
          kl: 0.012666681914419472
          policy_loss: -0.05070006031365622
          total_loss: 0.3505895922936144
          vf_explained_var: 0.868750274181366
          vf_loss: 0.42091305561008907
    num_agent_steps_sampled: 67932
    num_agent_steps_trained: 67932
    num_steps_sampled: 67932
    num_steps_trained: 67932
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,34,942.265,67932,6.6831,14.35,3.44,102.13




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 69930
  custom_metrics: {}
  date: 2021-11-13_21-45-50
  done: false
  episode_len_mean: 100.94
  episode_media: {}
  episode_reward_max: 10.52000000000002
  episode_reward_mean: 6.621500000000018
  episode_reward_min: -0.07
  episodes_this_iter: 19
  episodes_total: 704
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.2597378208523704
          entropy_coeff: 0.009999999999999998
          kl: 0.014719511475148222
          policy_loss: -0.010406661601293655
          total_loss: 0.47844268935067313
          vf_explained_var: 0.8472881317138672
          vf_loss: 0.5085028210920947
    num_agent_steps_sampled: 69930
    num_agent_steps_trained: 69930
    num_steps_sampled: 69930
    num_steps_trained: 69930
  iterations_since_restore: 35
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,35,982.04,69930,6.6215,10.52,-0.07,100.94




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 71928
  custom_metrics: {}
  date: 2021-11-13_21-46-46
  done: false
  episode_len_mean: 98.6
  episode_media: {}
  episode_reward_max: 13.95000000000002
  episode_reward_mean: 6.69190000000002
  episode_reward_min: -0.07
  episodes_this_iter: 22
  episodes_total: 726
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.1538316896983556
          entropy_coeff: 0.009999999999999998
          kl: 0.013106778795015036
          policy_loss: -0.04740572777532396
          total_loss: 0.6014426598236674
          vf_explained_var: 0.838639497756958
          vf_loss: 0.6677653463113875
    num_agent_steps_sampled: 71928
    num_agent_steps_trained: 71928
    num_steps_sampled: 71928
    num_steps_trained: 71928
  iterations_since_restore: 36
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,36,1038.01,71928,6.6919,13.95,-0.07,98.6


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 73926
  custom_metrics: {}
  date: 2021-11-13_21-47-11
  done: false
  episode_len_mean: 98.51
  episode_media: {}
  episode_reward_max: 13.95000000000002
  episode_reward_mean: 7.125400000000019
  episode_reward_min: -0.07
  episodes_this_iter: 21
  episodes_total: 747
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.1714624155135382
          entropy_coeff: 0.009999999999999998
          kl: 0.01239845241823041
          policy_loss: -0.019307865025032133
          total_loss: 0.428766608415615
          vf_explained_var: 0.8756886720657349
          vf_loss: 0.4673094034194946
    num_agent_steps_sampled: 73926
    num_agent_steps_trained: 73926
    num_steps_sampled: 73926
    num_steps_trained: 73926
  iterations_since_restore: 37
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,37,1062.36,73926,7.1254,13.95,-0.07,98.51


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 75924
  custom_metrics: {}
  date: 2021-11-13_21-47-34
  done: false
  episode_len_mean: 98.66
  episode_media: {}
  episode_reward_max: 14.46000000000002
  episode_reward_mean: 7.308200000000019
  episode_reward_min: -0.07
  episodes_this_iter: 18
  episodes_total: 765
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.1315475480897086
          entropy_coeff: 0.009999999999999998
          kl: 0.015857734824745735
          policy_loss: -0.028750535153916904
          total_loss: 0.5389289518197378
          vf_explained_var: 0.8449329733848572
          vf_loss: 0.5858234115299724
    num_agent_steps_sampled: 75924
    num_agent_steps_trained: 75924
    num_steps_sampled: 75924
    num_steps_trained: 75924
  iterations_since_restore: 38
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,38,1086,75924,7.3082,14.46,-0.07,98.66


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 77922
  custom_metrics: {}
  date: 2021-11-13_21-47-59
  done: false
  episode_len_mean: 97.63
  episode_media: {}
  episode_reward_max: 14.46000000000002
  episode_reward_mean: 7.721300000000018
  episode_reward_min: -0.07
  episodes_this_iter: 21
  episodes_total: 786
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.064345473902566
          entropy_coeff: 0.009999999999999998
          kl: 0.011214653417578825
          policy_loss: -0.04236576690205506
          total_loss: 0.47134813361224676
          vf_explained_var: 0.8747237324714661
          vf_loss: 0.5321144228889829
    num_agent_steps_sampled: 77922
    num_agent_steps_trained: 77922
    num_steps_sampled: 77922
    num_steps_trained: 77922
  iterations_since_restore: 39
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,39,1110.99,77922,7.7213,14.46,-0.07,97.63


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 79920
  custom_metrics: {}
  date: 2021-11-13_21-48-23
  done: false
  episode_len_mean: 97.53
  episode_media: {}
  episode_reward_max: 16.460000000000015
  episode_reward_mean: 8.061300000000019
  episode_reward_min: -0.05
  episodes_this_iter: 21
  episodes_total: 807
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.0811310535385497
          entropy_coeff: 0.009999999999999998
          kl: 0.013505505481622113
          policy_loss: 0.008054204568976448
          total_loss: 0.5953687170431727
          vf_explained_var: 0.8839794397354126
          vf_loss: 0.6054247186297462
    num_agent_steps_sampled: 79920
    num_agent_steps_trained: 79920
    num_steps_sampled: 79920
    num_steps_trained: 79920
  iterations_since_restore: 40
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,40,1134.64,79920,8.0613,16.46,-0.05,97.53


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 81918
  custom_metrics: {}
  date: 2021-11-13_21-48-46
  done: false
  episode_len_mean: 98.77
  episode_media: {}
  episode_reward_max: 16.460000000000015
  episode_reward_mean: 8.510200000000019
  episode_reward_min: 3.9500000000000224
  episodes_this_iter: 20
  episodes_total: 827
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.1464827685129073
          entropy_coeff: 0.009999999999999998
          kl: 0.010095882892295125
          policy_loss: -0.010805136302397364
          total_loss: 0.5121090232793774
          vf_explained_var: 0.8780866861343384
          vf_loss: 0.5423598075196856
    num_agent_steps_sampled: 81918
    num_agent_steps_trained: 81918
    num_steps_sampled: 81918
    num_steps_trained: 81918
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,41,1157.45,81918,8.5102,16.46,3.95,98.77


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 83916
  custom_metrics: {}
  date: 2021-11-13_21-49-10
  done: false
  episode_len_mean: 98.72
  episode_media: {}
  episode_reward_max: 16.460000000000015
  episode_reward_mean: 8.570000000000018
  episode_reward_min: 3.9500000000000224
  episodes_this_iter: 20
  episodes_total: 847
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.0686467778115047
          entropy_coeff: 0.009999999999999998
          kl: 0.01022427188789296
          policy_loss: -0.013151278187121664
          total_loss: 0.4305997407862118
          vf_explained_var: 0.915442168712616
          vf_loss: 0.4623926331599553
    num_agent_steps_sampled: 83916
    num_agent_steps_trained: 83916
    num_steps_sampled: 83916
    num_steps_trained: 83916
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,42,1181.13,83916,8.57,16.46,3.95,98.72


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 85914
  custom_metrics: {}
  date: 2021-11-13_21-49-34
  done: false
  episode_len_mean: 98.28
  episode_media: {}
  episode_reward_max: 16.460000000000015
  episode_reward_mean: 9.079700000000019
  episode_reward_min: 4.630000000000015
  episodes_this_iter: 20
  episodes_total: 867
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.048621946857089
          entropy_coeff: 0.009999999999999998
          kl: 0.012956297944678986
          policy_loss: -0.028654830441588446
          total_loss: 0.4890399945101568
          vf_explained_var: 0.9115848541259766
          vf_loss: 0.5355897863705953
    num_agent_steps_sampled: 85914
    num_agent_steps_trained: 85914
    num_steps_sampled: 85914
    num_steps_trained: 85914
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,43,1205.24,85914,9.0797,16.46,4.63,98.28


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 87912
  custom_metrics: {}
  date: 2021-11-13_21-49-58
  done: false
  episode_len_mean: 99.34
  episode_media: {}
  episode_reward_max: 16.460000000000015
  episode_reward_mean: 9.188500000000019
  episode_reward_min: 4.630000000000015
  episodes_this_iter: 20
  episodes_total: 887
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 2.0166887419564383
          entropy_coeff: 0.009999999999999998
          kl: 0.012632427444844435
          policy_loss: -0.034545144687096276
          total_loss: 0.4466810532269024
          vf_explained_var: 0.9166868925094604
          vf_loss: 0.4988665967470124
    num_agent_steps_sampled: 87912
    num_agent_steps_trained: 87912
    num_steps_sampled: 87912
    num_steps_trained: 87912
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,44,1229.48,87912,9.1885,16.46,4.63,99.34


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 89910
  custom_metrics: {}
  date: 2021-11-13_21-50-22
  done: false
  episode_len_mean: 99.79
  episode_media: {}
  episode_reward_max: 14.960000000000015
  episode_reward_mean: 9.471100000000018
  episode_reward_min: 4.630000000000015
  episodes_this_iter: 20
  episodes_total: 907
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 1.9679873914945694
          entropy_coeff: 0.009999999999999998
          kl: 0.014778833888478834
          policy_loss: -0.09830938027728171
          total_loss: 0.44313295801125824
          vf_explained_var: 0.9086946249008179
          vf_loss: 0.5581664445144789
    num_agent_steps_sampled: 89910
    num_agent_steps_trained: 89910
    num_steps_sampled: 89910
    num_steps_trained: 89910
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,45,1253.26,89910,9.4711,14.96,4.63,99.79


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 91908
  custom_metrics: {}
  date: 2021-11-13_21-50-45
  done: false
  episode_len_mean: 100.62
  episode_media: {}
  episode_reward_max: 14.960000000000015
  episode_reward_mean: 9.610800000000019
  episode_reward_min: 0.7499999999999991
  episodes_this_iter: 19
  episodes_total: 926
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 1.9951680949756077
          entropy_coeff: 0.009999999999999998
          kl: 0.015030189551314356
          policy_loss: -0.04139558186843282
          total_loss: 0.3450906290096186
          vf_explained_var: 0.9335429668426514
          vf_loss: 0.403431856348401
    num_agent_steps_sampled: 91908
    num_agent_steps_trained: 91908
    num_steps_sampled: 91908
    num_steps_trained: 91908
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,46,1276.86,91908,9.6108,14.96,0.75,100.62


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 93906
  custom_metrics: {}
  date: 2021-11-13_21-51-09
  done: false
  episode_len_mean: 100.92
  episode_media: {}
  episode_reward_max: 14.960000000000015
  episode_reward_mean: 9.81280000000002
  episode_reward_min: 0.7499999999999991
  episodes_this_iter: 20
  episodes_total: 946
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2
          cur_lr: 5.000000000000001e-05
          entropy: 1.9446950554847717
          entropy_coeff: 0.009999999999999998
          kl: 0.027832933045832352
          policy_loss: -0.003910987540369942
          total_loss: 0.4592363591261563
          vf_explained_var: 0.931538999080658
          vf_loss: 0.4770277087177549
    num_agent_steps_sampled: 93906
    num_agent_steps_trained: 93906
    num_steps_sampled: 93906
    num_steps_trained: 93906
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,47,1300.53,93906,9.8128,14.96,0.75,100.92


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 95904
  custom_metrics: {}
  date: 2021-11-13_21-51-32
  done: false
  episode_len_mean: 100.44
  episode_media: {}
  episode_reward_max: 14.960000000000015
  episode_reward_mean: 9.889800000000019
  episode_reward_min: 0.7499999999999991
  episodes_this_iter: 20
  episodes_total: 966
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.012054267383757
          entropy_coeff: 0.009999999999999998
          kl: 0.016390518253093393
          policy_loss: 0.02939922386514289
          total_loss: 0.47891050222374143
          vf_explained_var: 0.9373710751533508
          vf_loss: 0.46471466493038904
    num_agent_steps_sampled: 95904
    num_agent_steps_trained: 95904
    num_steps_sampled: 95904
    num_steps_trained: 95904
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,48,1323.57,95904,9.8898,14.96,0.75,100.44


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 97902
  custom_metrics: {}
  date: 2021-11-13_21-51-55
  done: false
  episode_len_mean: 102.05
  episode_media: {}
  episode_reward_max: 16.579999999999988
  episode_reward_mean: 10.317100000000018
  episode_reward_min: 0.7499999999999991
  episodes_this_iter: 20
  episodes_total: 986
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.005710151649657
          entropy_coeff: 0.009999999999999998
          kl: 0.0168506960699954
          policy_loss: -0.04903369453691301
          total_loss: 0.3583350877854086
          vf_explained_var: 0.9539857506752014
          vf_loss: 0.4223706740708578
    num_agent_steps_sampled: 97902
    num_agent_steps_trained: 97902
    num_steps_sampled: 97902
    num_steps_trained: 97902
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,49,1346.26,97902,10.3171,16.58,0.75,102.05


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 99900
  custom_metrics: {}
  date: 2021-11-13_21-52-17
  done: false
  episode_len_mean: 103.42
  episode_media: {}
  episode_reward_max: 16.579999999999988
  episode_reward_mean: 10.481700000000021
  episode_reward_min: 0.7499999999999991
  episodes_this_iter: 18
  episodes_total: 1004
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.9971751905622936
          entropy_coeff: 0.009999999999999998
          kl: 0.010432108442718132
          policy_loss: -0.08750912676609698
          total_loss: 0.2918745179084085
          vf_explained_var: 0.9508228302001953
          vf_loss: 0.3962257669440338
    num_agent_steps_sampled: 99900
    num_agent_steps_trained: 99900
    num_steps_sampled: 99900
    num_steps_trained: 99900
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,50,1368.27,99900,10.4817,16.58,0.75,103.42


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 101898
  custom_metrics: {}
  date: 2021-11-13_21-52-39
  done: false
  episode_len_mean: 104.01
  episode_media: {}
  episode_reward_max: 16.579999999999988
  episode_reward_mean: 10.956900000000017
  episode_reward_min: 3.4600000000000097
  episodes_this_iter: 18
  episodes_total: 1022
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.9686478132293337
          entropy_coeff: 0.009999999999999998
          kl: 0.009381469613467436
          policy_loss: -0.008493452572396823
          total_loss: 0.3636843596895536
          vf_explained_var: 0.9585323333740234
          vf_loss: 0.38904985111384166
    num_agent_steps_sampled: 101898
    num_agent_steps_trained: 101898
    num_steps_sampled: 101898
    num_steps_trained: 101898
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,51,1390.38,101898,10.9569,16.58,3.46,104.01


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 103896
  custom_metrics: {}
  date: 2021-11-13_21-53-01
  done: false
  episode_len_mean: 105.52
  episode_media: {}
  episode_reward_max: 16.579999999999988
  episode_reward_mean: 11.31800000000002
  episode_reward_min: 7.070000000000013
  episodes_this_iter: 18
  episodes_total: 1040
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.002624391941797
          entropy_coeff: 0.009999999999999998
          kl: 0.01054113614840447
          policy_loss: -0.03129929084153402
          total_loss: 0.34779087911315615
          vf_explained_var: 0.9602723717689514
          vf_loss: 0.39595407254639126
    num_agent_steps_sampled: 103896
    num_agent_steps_trained: 103896
    num_steps_sampled: 103896
    num_steps_trained: 103896
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,52,1412.66,103896,11.318,16.58,7.07,105.52




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 105894
  custom_metrics: {}
  date: 2021-11-13_21-53-40
  done: false
  episode_len_mean: 105.28
  episode_media: {}
  episode_reward_max: 16.589999999999986
  episode_reward_mean: 11.608300000000016
  episode_reward_min: -0.05
  episodes_this_iter: 20
  episodes_total: 1060
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.9286688117753892
          entropy_coeff: 0.009999999999999998
          kl: 0.014041773168410588
          policy_loss: 0.015964301409465927
          total_loss: 0.7691049462273007
          vf_explained_var: 0.9292076230049133
          vf_loss: 0.7682147937871161
    num_agent_steps_sampled: 105894
    num_agent_steps_trained: 105894
    num_steps_sampled: 105894
    num_steps_trained: 105894
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,53,1451.31,105894,11.6083,16.59,-0.05,105.28




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 107892
  custom_metrics: {}
  date: 2021-11-13_21-54-35
  done: false
  episode_len_mean: 104.62
  episode_media: {}
  episode_reward_max: 16.700000000000014
  episode_reward_mean: 11.778400000000019
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 1080
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.9058170261837186
          entropy_coeff: 0.009999999999999998
          kl: 0.015898508982469892
          policy_loss: -0.06640237750751632
          total_loss: 0.7704279189663273
          vf_explained_var: 0.9123910069465637
          vf_loss: 0.85111890598422
    num_agent_steps_sampled: 107892
    num_agent_steps_trained: 107892
    num_steps_sampled: 107892
    num_steps_trained: 107892
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,54,1505.92,107892,11.7784,16.7,-0.06,104.62


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 109890
  custom_metrics: {}
  date: 2021-11-13_21-54-58
  done: false
  episode_len_mean: 104.69
  episode_media: {}
  episode_reward_max: 16.869999999999983
  episode_reward_mean: 12.163700000000016
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 1099
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.9428518624532791
          entropy_coeff: 0.009999999999999998
          kl: 0.014978119109153368
          policy_loss: -0.06931650596005576
          total_loss: 0.34688504434944617
          vf_explained_var: 0.953303337097168
          vf_loss: 0.4311366311851002
    num_agent_steps_sampled: 109890
    num_agent_steps_trained: 109890
    num_steps_sampled: 109890
    num_steps_trained: 109890
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,55,1529.04,109890,12.1637,16.87,-0.06,104.69


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 111888
  custom_metrics: {}
  date: 2021-11-13_21-55-20
  done: false
  episode_len_mean: 105.84
  episode_media: {}
  episode_reward_max: 16.869999999999983
  episode_reward_mean: 12.164000000000014
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 1118
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.909884478364672
          entropy_coeff: 0.009999999999999998
          kl: 0.012747264172949116
          policy_loss: -0.09826074632860365
          total_loss: 0.4259021386415476
          vf_explained_var: 0.9488880038261414
          vf_loss: 0.5394375527188892
    num_agent_steps_sampled: 111888
    num_agent_steps_trained: 111888
    num_steps_sampled: 111888
    num_steps_trained: 111888
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,56,1551.35,111888,12.164,16.87,-0.06,105.84


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 113886
  custom_metrics: {}
  date: 2021-11-13_21-55-43
  done: false
  episode_len_mean: 105.31
  episode_media: {}
  episode_reward_max: 16.869999999999983
  episode_reward_mean: 12.465300000000015
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 18
  episodes_total: 1136
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.8733776643162683
          entropy_coeff: 0.009999999999999998
          kl: 0.013008140038830622
          policy_loss: -0.01724286602721328
          total_loss: 0.4357307063593041
          vf_explained_var: 0.9518299102783203
          vf_loss: 0.46780490279197695
    num_agent_steps_sampled: 113886
    num_agent_steps_trained: 113886
    num_steps_sampled: 113886
    num_steps_trained: 113886
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,57,1573.7,113886,12.4653,16.87,-0.06,105.31


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 115884
  custom_metrics: {}
  date: 2021-11-13_21-56-04
  done: false
  episode_len_mean: 106.91
  episode_media: {}
  episode_reward_max: 18.729999999999976
  episode_reward_mean: 12.974600000000017
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 18
  episodes_total: 1154
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.8543750927561806
          entropy_coeff: 0.009999999999999998
          kl: 0.01443908043521821
          policy_loss: -0.04882249395762171
          total_loss: 0.5150115792240415
          vf_explained_var: 0.9487152695655823
          vf_loss: 0.5780461020412899
    num_agent_steps_sampled: 115884
    num_agent_steps_trained: 115884
    num_steps_sampled: 115884
    num_steps_trained: 115884
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,58,1595.15,115884,12.9746,18.73,-0.06,106.91


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 117882
  custom_metrics: {}
  date: 2021-11-13_21-56-27
  done: false
  episode_len_mean: 106.77
  episode_media: {}
  episode_reward_max: 18.729999999999976
  episode_reward_mean: 13.250000000000009
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 1173
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.8141720334688822
          entropy_coeff: 0.009999999999999998
          kl: 0.01916765816200174
          policy_loss: -0.028323547630792572
          total_loss: 0.5817268801941758
          vf_explained_var: 0.9451218247413635
          vf_loss: 0.6224418479771842
    num_agent_steps_sampled: 117882
    num_agent_steps_trained: 117882
    num_steps_sampled: 117882
    num_steps_trained: 117882
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,59,1618.28,117882,13.25,18.73,-0.06,106.77


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 119880
  custom_metrics: {}
  date: 2021-11-13_21-56-49
  done: false
  episode_len_mean: 108.46
  episode_media: {}
  episode_reward_max: 18.759999999999966
  episode_reward_mean: 13.487200000000007
  episode_reward_min: 5.150000000000011
  episodes_this_iter: 18
  episodes_total: 1191
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.8754095577058338
          entropy_coeff: 0.009999999999999998
          kl: 0.013200535261240506
          policy_loss: -0.030827534730945315
          total_loss: 0.5188736442299117
          vf_explained_var: 0.954889178276062
          vf_loss: 0.5644951121438118
    num_agent_steps_sampled: 119880
    num_agent_steps_trained: 119880
    num_steps_sampled: 119880
    num_steps_trained: 119880
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,60,1640.15,119880,13.4872,18.76,5.15,108.46


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 121878
  custom_metrics: {}
  date: 2021-11-13_21-57-11
  done: false
  episode_len_mean: 108.18
  episode_media: {}
  episode_reward_max: 20.429999999999932
  episode_reward_mean: 13.871200000000009
  episode_reward_min: 5.150000000000011
  episodes_this_iter: 19
  episodes_total: 1210
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.8498177255902972
          entropy_coeff: 0.009999999999999998
          kl: 0.011111849992017567
          policy_loss: -0.06558809677759807
          total_loss: 0.44231886624225547
          vf_explained_var: 0.9614323377609253
          vf_loss: 0.5230715839635758
    num_agent_steps_sampled: 121878
    num_agent_steps_trained: 121878
    num_steps_sampled: 121878
    num_steps_trained: 121878
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,61,1661.81,121878,13.8712,20.43,5.15,108.18


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 123876
  custom_metrics: {}
  date: 2021-11-13_21-57-33
  done: false
  episode_len_mean: 109.19
  episode_media: {}
  episode_reward_max: 20.429999999999932
  episode_reward_mean: 14.0146
  episode_reward_min: 5.150000000000011
  episodes_this_iter: 16
  episodes_total: 1226
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.8438412013508025
          entropy_coeff: 0.009999999999999998
          kl: 0.01267909554854274
          policy_loss: -0.01157809208546366
          total_loss: 0.5453249134478115
          vf_explained_var: 0.9588461518287659
          vf_loss: 0.5715376856071609
    num_agent_steps_sampled: 123876
    num_agent_steps_trained: 123876
    num_steps_sampled: 123876
    num_steps_trained: 123876
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,62,1683.68,123876,14.0146,20.43,5.15,109.19


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 125874
  custom_metrics: {}
  date: 2021-11-13_21-57-56
  done: false
  episode_len_mean: 109.04
  episode_media: {}
  episode_reward_max: 20.429999999999932
  episode_reward_mean: 14.0461
  episode_reward_min: 5.150000000000011
  episodes_this_iter: 19
  episodes_total: 1245
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.8244810047603788
          entropy_coeff: 0.009999999999999998
          kl: 0.014611875299244135
          policy_loss: -0.05834969498571895
          total_loss: 0.5557149478012607
          vf_explained_var: 0.9485534429550171
          vf_loss: 0.627925888981138
    num_agent_steps_sampled: 125874
    num_agent_steps_trained: 125874
    num_steps_sampled: 125874
    num_steps_trained: 125874
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,63,1706.8,125874,14.0461,20.43,5.15,109.04


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 127872
  custom_metrics: {}
  date: 2021-11-13_21-58-19
  done: false
  episode_len_mean: 109.89
  episode_media: {}
  episode_reward_max: 20.429999999999932
  episode_reward_mean: 13.894099999999998
  episode_reward_min: 1.940000000000019
  episodes_this_iter: 18
  episodes_total: 1263
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.8103798457554408
          entropy_coeff: 0.009999999999999998
          kl: 0.016037312845530203
          policy_loss: -0.04926043835779031
          total_loss: 0.8421541409123512
          vf_explained_var: 0.9247145652770996
          vf_loss: 0.9047071923102651
    num_agent_steps_sampled: 127872
    num_agent_steps_trained: 127872
    num_steps_sampled: 127872
    num_steps_trained: 127872
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,64,1729.38,127872,13.8941,20.43,1.94,109.89


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 129870
  custom_metrics: {}
  date: 2021-11-13_21-58-41
  done: false
  episode_len_mean: 110.08
  episode_media: {}
  episode_reward_max: 20.429999999999932
  episode_reward_mean: 13.5632
  episode_reward_min: 1.940000000000019
  episodes_this_iter: 19
  episodes_total: 1282
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.8859296094803584
          entropy_coeff: 0.009999999999999998
          kl: 0.017045966771120465
          policy_loss: -0.009073559620550701
          total_loss: 0.7267533628181333
          vf_explained_var: 0.9327951073646545
          vf_loss: 0.7495724351633163
    num_agent_steps_sampled: 129870
    num_agent_steps_trained: 129870
    num_steps_sampled: 129870
    num_steps_trained: 129870
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,65,1751.72,129870,13.5632,20.43,1.94,110.08


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 131868
  custom_metrics: {}
  date: 2021-11-13_21-59-03
  done: false
  episode_len_mean: 110.52
  episode_media: {}
  episode_reward_max: 20.429999999999932
  episode_reward_mean: 13.886999999999999
  episode_reward_min: 1.940000000000019
  episodes_this_iter: 17
  episodes_total: 1299
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.8500871754827954
          entropy_coeff: 0.009999999999999998
          kl: 0.01021945883259825
          policy_loss: -0.05555999188550881
          total_loss: 0.44098224427018845
          vf_explained_var: 0.9560024738311768
          vf_loss: 0.5119772693940572
    num_agent_steps_sampled: 131868
    num_agent_steps_trained: 131868
    num_steps_sampled: 131868
    num_steps_trained: 131868
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,66,1773.66,131868,13.887,20.43,1.94,110.52


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 133866
  custom_metrics: {}
  date: 2021-11-13_21-59-25
  done: false
  episode_len_mean: 110.89
  episode_media: {}
  episode_reward_max: 18.839999999999982
  episode_reward_mean: 14.09229999999999
  episode_reward_min: 1.940000000000019
  episodes_this_iter: 17
  episodes_total: 1316
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.8686426804179237
          entropy_coeff: 0.009999999999999998
          kl: 0.011648627068183075
          policy_loss: -0.08041211147570894
          total_loss: 0.3604199135942118
          vf_explained_var: 0.9657325148582458
          vf_loss: 0.45602386324178606
    num_agent_steps_sampled: 133866
    num_agent_steps_trained: 133866
    num_steps_sampled: 133866
    num_steps_trained: 133866
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,67,1795.28,133866,14.0923,18.84,1.94,110.89


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 135864
  custom_metrics: {}
  date: 2021-11-13_21-59-47
  done: false
  episode_len_mean: 110.92
  episode_media: {}
  episode_reward_max: 18.929999999999968
  episode_reward_mean: 14.573399999999987
  episode_reward_min: 1.940000000000019
  episodes_this_iter: 20
  episodes_total: 1336
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.7921964378583999
          entropy_coeff: 0.009999999999999998
          kl: 0.01204349857856998
          policy_loss: -0.008813671412922088
          total_loss: 0.6196134351548694
          vf_explained_var: 0.9592212438583374
          vf_loss: 0.6427360249417169
    num_agent_steps_sampled: 135864
    num_agent_steps_trained: 135864
    num_steps_sampled: 135864
    num_steps_trained: 135864
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,68,1817.71,135864,14.5734,18.93,1.94,110.92


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 137862
  custom_metrics: {}
  date: 2021-11-13_22-00-11
  done: false
  episode_len_mean: 109.74
  episode_media: {}
  episode_reward_max: 18.929999999999968
  episode_reward_mean: 15.159399999999982
  episode_reward_min: 1.940000000000019
  episodes_this_iter: 18
  episodes_total: 1354
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.821025740532648
          entropy_coeff: 0.009999999999999998
          kl: 0.011585952133837736
          policy_loss: -0.026929066791420892
          total_loss: 0.391136464387888
          vf_explained_var: 0.971234142780304
          vf_loss: 0.43280000388622286
    num_agent_steps_sampled: 137862
    num_agent_steps_trained: 137862
    num_steps_sampled: 137862
    num_steps_trained: 137862
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,69,1841.36,137862,15.1594,18.93,1.94,109.74


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 139860
  custom_metrics: {}
  date: 2021-11-13_22-00-34
  done: false
  episode_len_mean: 109.42
  episode_media: {}
  episode_reward_max: 20.789999999999946
  episode_reward_mean: 15.708599999999976
  episode_reward_min: 7.890000000000024
  episodes_this_iter: 19
  episodes_total: 1373
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.8742527479217166
          entropy_coeff: 0.009999999999999998
          kl: 0.010933333701677804
          policy_loss: 0.02700618931225368
          total_loss: 0.6021633629287992
          vf_explained_var: 0.9630308151245117
          vf_loss: 0.5906197032758168
    num_agent_steps_sampled: 139860
    num_agent_steps_trained: 139860
    num_steps_sampled: 139860
    num_steps_trained: 139860
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,70,1864.22,139860,15.7086,20.79,7.89,109.42


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 141858
  custom_metrics: {}
  date: 2021-11-13_22-00-57
  done: false
  episode_len_mean: 108.45
  episode_media: {}
  episode_reward_max: 20.789999999999946
  episode_reward_mean: 16.002299999999977
  episode_reward_min: 8.21000000000002
  episodes_this_iter: 18
  episodes_total: 1391
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.7753831187884013
          entropy_coeff: 0.009999999999999998
          kl: 0.014639347125546888
          policy_loss: 0.02907737823469298
          total_loss: 0.437026486687717
          vf_explained_var: 0.9749583005905151
          vf_loss: 0.42131114133766717
    num_agent_steps_sampled: 141858
    num_agent_steps_trained: 141858
    num_steps_sampled: 141858
    num_steps_trained: 141858
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,71,1887.4,141858,16.0023,20.79,8.21,108.45


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 143856
  custom_metrics: {}
  date: 2021-11-13_22-01-20
  done: false
  episode_len_mean: 107.45
  episode_media: {}
  episode_reward_max: 20.789999999999946
  episode_reward_mean: 16.06929999999997
  episode_reward_min: 8.21000000000002
  episodes_this_iter: 20
  episodes_total: 1411
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.8172982914107187
          entropy_coeff: 0.009999999999999998
          kl: 0.01034252843307333
          policy_loss: -0.12201774840553602
          total_loss: 0.4468080009378138
          vf_explained_var: 0.9663057327270508
          vf_loss: 0.583895969532785
    num_agent_steps_sampled: 143856
    num_agent_steps_trained: 143856
    num_steps_sampled: 143856
    num_steps_trained: 143856
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,72,1910.33,143856,16.0693,20.79,8.21,107.45




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 145854
  custom_metrics: {}
  date: 2021-11-13_22-02-11
  done: false
  episode_len_mean: 106.3
  episode_media: {}
  episode_reward_max: 20.789999999999946
  episode_reward_mean: 16.046999999999972
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 1430
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.7662285231408619
          entropy_coeff: 0.009999999999999998
          kl: 0.011125788104461064
          policy_loss: 0.0022801146266006288
          total_loss: 0.8733092515152835
          vf_explained_var: 0.9521196484565735
          vf_loss: 0.8853536918049767
    num_agent_steps_sampled: 145854
    num_agent_steps_trained: 145854
    num_steps_sampled: 145854
    num_steps_trained: 145854
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,73,1961.53,145854,16.047,20.79,-0.06,106.3




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 147852
  custom_metrics: {}
  date: 2021-11-13_22-02-50
  done: false
  episode_len_mean: 105.21
  episode_media: {}
  episode_reward_max: 20.789999999999946
  episode_reward_mean: 16.035399999999967
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 1449
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.7458575288454692
          entropy_coeff: 0.009999999999999998
          kl: 0.00986137397715217
          policy_loss: -0.021404257647338366
          total_loss: 1.0076447784545877
          vf_explained_var: 0.9427377581596375
          vf_loss: 1.043549187694277
    num_agent_steps_sampled: 147852
    num_agent_steps_trained: 147852
    num_steps_sampled: 147852
    num_steps_trained: 147852
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,74,2000.35,147852,16.0354,20.79,-0.06,105.21


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 149850
  custom_metrics: {}
  date: 2021-11-13_22-03-14
  done: false
  episode_len_mean: 104.71
  episode_media: {}
  episode_reward_max: 20.789999999999946
  episode_reward_mean: 16.22099999999996
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 18
  episodes_total: 1467
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.73950069972447
          entropy_coeff: 0.009999999999999998
          kl: 0.010102733463068957
          policy_loss: 0.025000469776846113
          total_loss: 0.5291260911950043
          vf_explained_var: 0.9702696204185486
          vf_loss: 0.5184898108243943
    num_agent_steps_sampled: 149850
    num_agent_steps_trained: 149850
    num_steps_sampled: 149850
    num_steps_trained: 149850
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,75,2024.06,149850,16.221,20.79,-0.06,104.71


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 151848
  custom_metrics: {}
  date: 2021-11-13_22-03-36
  done: false
  episode_len_mean: 105.44
  episode_media: {}
  episode_reward_max: 20.81999999999994
  episode_reward_mean: 16.346499999999963
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 18
  episodes_total: 1485
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.7162161838440668
          entropy_coeff: 0.009999999999999998
          kl: 0.018113779360524634
          policy_loss: 0.006635627540804091
          total_loss: 0.5062316156391586
          vf_explained_var: 0.9744072556495667
          vf_loss: 0.5113240144792057
    num_agent_steps_sampled: 151848
    num_agent_steps_trained: 151848
    num_steps_sampled: 151848
    num_steps_trained: 151848
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,76,2046.65,151848,16.3465,20.82,-0.06,105.44


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 153846
  custom_metrics: {}
  date: 2021-11-13_22-03-59
  done: false
  episode_len_mean: 105.45
  episode_media: {}
  episode_reward_max: 20.81999999999994
  episode_reward_mean: 16.465899999999962
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 1506
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.6781271162487212
          entropy_coeff: 0.009999999999999998
          kl: 0.01101572010217289
          policy_loss: -0.02873067671344394
          total_loss: 0.5291785389805833
          vf_explained_var: 0.9742415547370911
          vf_loss: 0.5713857703975269
    num_agent_steps_sampled: 153846
    num_agent_steps_trained: 153846
    num_steps_sampled: 153846
    num_steps_trained: 153846
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,77,2069.75,153846,16.4659,20.82,-0.06,105.45


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 155844
  custom_metrics: {}
  date: 2021-11-13_22-04-22
  done: false
  episode_len_mean: 106.38
  episode_media: {}
  episode_reward_max: 20.859999999999957
  episode_reward_mean: 16.94699999999996
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 18
  episodes_total: 1524
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.658703096707662
          entropy_coeff: 0.009999999999999998
          kl: 0.010575411363749792
          policy_loss: -0.033163808108795254
          total_loss: 0.3603851248022346
          vf_explained_var: 0.9779025316238403
          vf_loss: 0.4069633393060593
    num_agent_steps_sampled: 155844
    num_agent_steps_trained: 155844
    num_steps_sampled: 155844
    num_steps_trained: 155844
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,78,2092.11,155844,16.947,20.86,-0.06,106.38


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 157842
  custom_metrics: {}
  date: 2021-11-13_22-04-44
  done: false
  episode_len_mean: 108.35
  episode_media: {}
  episode_reward_max: 20.859999999999957
  episode_reward_mean: 17.42029999999995
  episode_reward_min: 11.780000000000022
  episodes_this_iter: 18
  episodes_total: 1542
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.7207683988979885
          entropy_coeff: 0.009999999999999998
          kl: 0.012002077051425981
          policy_loss: -0.06273158526136761
          total_loss: 0.4332795441061968
          vf_explained_var: 0.9737151861190796
          vf_loss: 0.509618189008463
    num_agent_steps_sampled: 157842
    num_agent_steps_trained: 157842
    num_steps_sampled: 157842
    num_steps_trained: 157842
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,79,2113.94,157842,17.4203,20.86,11.78,108.35


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 159840
  custom_metrics: {}
  date: 2021-11-13_22-05-06
  done: false
  episode_len_mean: 109.38
  episode_media: {}
  episode_reward_max: 20.859999999999957
  episode_reward_mean: 17.54189999999995
  episode_reward_min: 11.780000000000022
  episodes_this_iter: 17
  episodes_total: 1559
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.6983579964864821
          entropy_coeff: 0.009999999999999998
          kl: 0.013086715188312071
          policy_loss: -0.027625643852211182
          total_loss: 0.5877295117692224
          vf_explained_var: 0.9740790128707886
          vf_loss: 0.6284127264505341
    num_agent_steps_sampled: 159840
    num_agent_steps_trained: 159840
    num_steps_sampled: 159840
    num_steps_trained: 159840
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,80,2136.05,159840,17.5419,20.86,11.78,109.38


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 161838
  custom_metrics: {}
  date: 2021-11-13_22-05-28
  done: false
  episode_len_mean: 110.08
  episode_media: {}
  episode_reward_max: 20.859999999999957
  episode_reward_mean: 17.68599999999995
  episode_reward_min: 10.300000000000024
  episodes_this_iter: 18
  episodes_total: 1577
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.699652373790741
          entropy_coeff: 0.009999999999999998
          kl: 0.010149490428813952
          policy_loss: -0.028372498459759213
          total_loss: 0.45921505549922587
          vf_explained_var: 0.9759918451309204
          vf_loss: 0.5015392311272167
    num_agent_steps_sampled: 161838
    num_agent_steps_trained: 161838
    num_steps_sampled: 161838
    num_steps_trained: 161838
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,81,2158.55,161838,17.686,20.86,10.3,110.08


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 163836
  custom_metrics: {}
  date: 2021-11-13_22-05-50
  done: false
  episode_len_mean: 110.2
  episode_media: {}
  episode_reward_max: 20.859999999999957
  episode_reward_mean: 17.884499999999942
  episode_reward_min: 10.300000000000024
  episodes_this_iter: 18
  episodes_total: 1595
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.6811317897978284
          entropy_coeff: 0.009999999999999998
          kl: 0.009583615844240759
          policy_loss: -0.008329885098196212
          total_loss: 0.45870009644755294
          vf_explained_var: 0.9769161939620972
          vf_loss: 0.4809662149066017
    num_agent_steps_sampled: 163836
    num_agent_steps_trained: 163836
    num_steps_sampled: 163836
    num_steps_trained: 163836
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,82,2180.19,163836,17.8845,20.86,10.3,110.2


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 165834
  custom_metrics: {}
  date: 2021-11-13_22-06-12
  done: false
  episode_len_mean: 110.97
  episode_media: {}
  episode_reward_max: 20.85999999999994
  episode_reward_mean: 17.729299999999945
  episode_reward_min: 9.470000000000024
  episodes_this_iter: 18
  episodes_total: 1613
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.6904172460238138
          entropy_coeff: 0.009999999999999998
          kl: 0.010373251023614579
          policy_loss: -0.00014343183665048507
          total_loss: 0.4445294025753226
          vf_explained_var: 0.9809646606445312
          vf_loss: 0.458465031853744
    num_agent_steps_sampled: 165834
    num_agent_steps_trained: 165834
    num_steps_sampled: 165834
    num_steps_trained: 165834
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,83,2202.2,165834,17.7293,20.86,9.47,110.97


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 167832
  custom_metrics: {}
  date: 2021-11-13_22-06-35
  done: false
  episode_len_mean: 110.97
  episode_media: {}
  episode_reward_max: 20.85999999999994
  episode_reward_mean: 17.78169999999994
  episode_reward_min: 9.470000000000024
  episodes_this_iter: 18
  episodes_total: 1631
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.6412416900907243
          entropy_coeff: 0.009999999999999998
          kl: 0.013338606762541769
          policy_loss: 0.009395441377446766
          total_loss: 0.6029155730890731
          vf_explained_var: 0.9729859828948975
          vf_loss: 0.6059309645068078
    num_agent_steps_sampled: 167832
    num_agent_steps_trained: 167832
    num_steps_sampled: 167832
    num_steps_trained: 167832
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,84,2224.94,167832,17.7817,20.86,9.47,110.97


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 169830
  custom_metrics: {}
  date: 2021-11-13_22-06-57
  done: false
  episode_len_mean: 111.32
  episode_media: {}
  episode_reward_max: 20.67999999999995
  episode_reward_mean: 17.905799999999935
  episode_reward_min: 9.470000000000024
  episodes_this_iter: 18
  episodes_total: 1649
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.6653145284879776
          entropy_coeff: 0.009999999999999998
          kl: 0.012412244953913694
          policy_loss: 0.009585922459761302
          total_loss: 0.4620588656780975
          vf_explained_var: 0.9803516268730164
          vf_loss: 0.4654024131241299
    num_agent_steps_sampled: 169830
    num_agent_steps_trained: 169830
    num_steps_sampled: 169830
    num_steps_trained: 169830
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,85,2246.83,169830,17.9058,20.68,9.47,111.32


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 171828
  custom_metrics: {}
  date: 2021-11-13_22-07-19
  done: false
  episode_len_mean: 111.09
  episode_media: {}
  episode_reward_max: 20.67999999999995
  episode_reward_mean: 17.94969999999994
  episode_reward_min: 9.470000000000024
  episodes_this_iter: 18
  episodes_total: 1667
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.6158346306710016
          entropy_coeff: 0.009999999999999998
          kl: 0.011185788028411602
          policy_loss: 0.008360119376863753
          total_loss: 0.3971852244010993
          vf_explained_var: 0.9799082279205322
          vf_loss: 0.4016277162092073
    num_agent_steps_sampled: 171828
    num_agent_steps_trained: 171828
    num_steps_sampled: 171828
    num_steps_trained: 171828
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,86,2269.08,171828,17.9497,20.68,9.47,111.09


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 173826
  custom_metrics: {}
  date: 2021-11-13_22-07-41
  done: false
  episode_len_mean: 110.95
  episode_media: {}
  episode_reward_max: 20.78999999999995
  episode_reward_mean: 18.119999999999937
  episode_reward_min: 9.470000000000024
  episodes_this_iter: 18
  episodes_total: 1685
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.6272864693687075
          entropy_coeff: 0.009999999999999998
          kl: 0.010317403862244032
          policy_loss: 0.035961571974413735
          total_loss: 0.45120665438118435
          vf_explained_var: 0.9771029949188232
          vf_loss: 0.42842272541352683
    num_agent_steps_sampled: 173826
    num_agent_steps_trained: 173826
    num_steps_sampled: 173826
    num_steps_trained: 173826
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,87,2291.55,173826,18.12,20.79,9.47,110.95


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 175824
  custom_metrics: {}
  date: 2021-11-13_22-08-04
  done: false
  episode_len_mean: 110.81
  episode_media: {}
  episode_reward_max: 20.78999999999995
  episode_reward_mean: 18.26089999999994
  episode_reward_min: 9.470000000000024
  episodes_this_iter: 18
  episodes_total: 1703
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.6301525632540386
          entropy_coeff: 0.009999999999999998
          kl: 0.011007087087960889
          policy_loss: -0.038515881519942056
          total_loss: 0.3406763289655958
          vf_explained_var: 0.9807603359222412
          vf_loss: 0.3921916091016361
    num_agent_steps_sampled: 175824
    num_agent_steps_trained: 175824
    num_steps_sampled: 175824
    num_steps_trained: 175824
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,88,2314.36,175824,18.2609,20.79,9.47,110.81


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 177822
  custom_metrics: {}
  date: 2021-11-13_22-08-26
  done: false
  episode_len_mean: 110.46
  episode_media: {}
  episode_reward_max: 20.78999999999995
  episode_reward_mean: 18.504299999999937
  episode_reward_min: 11.730000000000022
  episodes_this_iter: 19
  episodes_total: 1722
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.5636804399036226
          entropy_coeff: 0.009999999999999998
          kl: 0.010983583863145093
          policy_loss: -0.022884496301412583
          total_loss: 0.44435096212795805
          vf_explained_var: 0.9767658114433289
          vf_loss: 0.4795771909611566
    num_agent_steps_sampled: 177822
    num_agent_steps_trained: 177822
    num_steps_sampled: 177822
    num_steps_trained: 177822
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,89,2336.52,177822,18.5043,20.79,11.73,110.46


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 179820
  custom_metrics: {}
  date: 2021-11-13_22-08-49
  done: false
  episode_len_mean: 110.43
  episode_media: {}
  episode_reward_max: 20.78999999999995
  episode_reward_mean: 18.562999999999935
  episode_reward_min: 14.06000000000002
  episodes_this_iter: 18
  episodes_total: 1740
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.6445948123931884
          entropy_coeff: 0.009999999999999998
          kl: 0.01734739193024358
          policy_loss: 0.003134311842066901
          total_loss: 0.5362420300376557
          vf_explained_var: 0.9774152636528015
          vf_loss: 0.54434945193075
    num_agent_steps_sampled: 179820
    num_agent_steps_trained: 179820
    num_steps_sampled: 179820
    num_steps_trained: 179820
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,90,2358.64,179820,18.563,20.79,14.06,110.43


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 181818
  custom_metrics: {}
  date: 2021-11-13_22-09-11
  done: false
  episode_len_mean: 109.56
  episode_media: {}
  episode_reward_max: 20.78999999999995
  episode_reward_mean: 18.543999999999933
  episode_reward_min: 14.050000000000022
  episodes_this_iter: 19
  episodes_total: 1759
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.5570055132820493
          entropy_coeff: 0.009999999999999998
          kl: 0.009947780976052336
          policy_loss: -0.0413735456764698
          total_loss: 0.3479982505951609
          vf_explained_var: 0.9814622402191162
          vf_loss: 0.40195751761396725
    num_agent_steps_sampled: 181818
    num_agent_steps_trained: 181818
    num_steps_sampled: 181818
    num_steps_trained: 181818
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,91,2381.36,181818,18.544,20.79,14.05,109.56




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 183816
  custom_metrics: {}
  date: 2021-11-13_22-09-51
  done: false
  episode_len_mean: 107.7
  episode_media: {}
  episode_reward_max: 20.759999999999927
  episode_reward_mean: 18.44069999999994
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 19
  episodes_total: 1778
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.507086197535197
          entropy_coeff: 0.009999999999999998
          kl: 0.013447636193266607
          policy_loss: -0.04189669347944714
          total_loss: 0.9921809265301341
          vf_explained_var: 0.9524948000907898
          vf_loss: 1.045114207409677
    num_agent_steps_sampled: 183816
    num_agent_steps_trained: 183816
    num_steps_sampled: 183816
    num_steps_trained: 183816
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,92,2420.96,183816,18.4407,20.76,-0.15,107.7




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 185814
  custom_metrics: {}
  date: 2021-11-13_22-10-42
  done: false
  episode_len_mean: 106.04
  episode_media: {}
  episode_reward_max: 20.759999999999927
  episode_reward_mean: 18.05069999999994
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 20
  episodes_total: 1798
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.6271198119435992
          entropy_coeff: 0.009999999999999998
          kl: 0.010149403134017436
          policy_loss: 0.007362799044875872
          total_loss: 1.1681991689261937
          vf_explained_var: 0.9494108557701111
          vf_loss: 1.1740627219989186
    num_agent_steps_sampled: 185814
    num_agent_steps_trained: 185814
    num_steps_sampled: 185814
    num_steps_trained: 185814
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,93,2471.76,185814,18.0507,20.76,-0.15,106.04


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 187812
  custom_metrics: {}
  date: 2021-11-13_22-11-05
  done: false
  episode_len_mean: 106.53
  episode_media: {}
  episode_reward_max: 20.689999999999962
  episode_reward_mean: 17.820899999999934
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 18
  episodes_total: 1816
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.587763362271445
          entropy_coeff: 0.009999999999999998
          kl: 0.019369949315071282
          policy_loss: -0.017171752701203028
          total_loss: 0.4551731254905462
          vf_explained_var: 0.9803612232208252
          vf_loss: 0.4824115271369616
    num_agent_steps_sampled: 187812
    num_agent_steps_trained: 187812
    num_steps_sampled: 187812
    num_steps_trained: 187812
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,94,2495.13,187812,17.8209,20.69,-0.15,106.53


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 189810
  custom_metrics: {}
  date: 2021-11-13_22-11-27
  done: false
  episode_len_mean: 106.29
  episode_media: {}
  episode_reward_max: 20.749999999999943
  episode_reward_mean: 17.910999999999934
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 18
  episodes_total: 1834
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.5484186677705674
          entropy_coeff: 0.009999999999999998
          kl: 0.010369657202405683
          policy_loss: 0.011159782626089595
          total_loss: 0.34946475965636115
          vf_explained_var: 0.9848112463951111
          vf_loss: 0.3506782669041838
    num_agent_steps_sampled: 189810
    num_agent_steps_trained: 189810
    num_steps_sampled: 189810
    num_steps_trained: 189810
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,95,2517.38,189810,17.911,20.75,-0.15,106.29


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 191808
  custom_metrics: {}
  date: 2021-11-13_22-11-49
  done: false
  episode_len_mean: 106.76
  episode_media: {}
  episode_reward_max: 20.749999999999943
  episode_reward_mean: 17.996399999999934
  episode_reward_min: -0.15000000000000002
  episodes_this_iter: 19
  episodes_total: 1853
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.5677674770355225
          entropy_coeff: 0.009999999999999998
          kl: 0.013673470481801
          policy_loss: -0.06141674064454578
          total_loss: 0.2899356407778604
          vf_explained_var: 0.9847626686096191
          vf_loss: 0.3629280169095312
    num_agent_steps_sampled: 191808
    num_agent_steps_trained: 191808
    num_steps_sampled: 191808
    num_steps_trained: 191808
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,96,2539.27,191808,17.9964,20.75,-0.15,106.76


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 193806
  custom_metrics: {}
  date: 2021-11-13_22-12-12
  done: false
  episode_len_mean: 107.78
  episode_media: {}
  episode_reward_max: 20.749999999999943
  episode_reward_mean: 18.30919999999993
  episode_reward_min: 1.94
  episodes_this_iter: 18
  episodes_total: 1871
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.5282579109782264
          entropy_coeff: 0.009999999999999998
          kl: 0.015040780546745038
          policy_loss: -0.02141638029189337
          total_loss: 0.23592339061378015
          vf_explained_var: 0.9892106056213379
          vf_loss: 0.2681101145488875
    num_agent_steps_sampled: 193806
    num_agent_steps_trained: 193806
    num_steps_sampled: 193806
    num_steps_trained: 193806
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,97,2561.42,193806,18.3092,20.75,1.94,107.78


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 195804
  custom_metrics: {}
  date: 2021-11-13_22-12-33
  done: false
  episode_len_mean: 108.11
  episode_media: {}
  episode_reward_max: 20.749999999999943
  episode_reward_mean: 18.49389999999993
  episode_reward_min: 1.94
  episodes_this_iter: 18
  episodes_total: 1889
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.5954686068353199
          entropy_coeff: 0.009999999999999998
          kl: 0.00869668973079626
          policy_loss: -0.0515350211056925
          total_loss: 0.18329534337279343
          vf_explained_var: 0.9904111623764038
          vf_loss: 0.24817604191956066
    num_agent_steps_sampled: 195804
    num_agent_steps_trained: 195804
    num_steps_sampled: 195804
    num_steps_trained: 195804
  iterations_since_restore: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,98,2582.84,195804,18.4939,20.75,1.94,108.11


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 197802
  custom_metrics: {}
  date: 2021-11-13_22-12-55
  done: false
  episode_len_mean: 109.84
  episode_media: {}
  episode_reward_max: 20.749999999999943
  episode_reward_mean: 18.943299999999926
  episode_reward_min: 9.630000000000024
  episodes_this_iter: 18
  episodes_total: 1907
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.4782796553203037
          entropy_coeff: 0.009999999999999998
          kl: 0.013192113647766693
          policy_loss: -0.0025401452822344643
          total_loss: 0.2685438723968608
          vf_explained_var: 0.9900880455970764
          vf_loss: 0.28190918295156386
    num_agent_steps_sampled: 197802
    num_agent_steps_trained: 197802
    num_steps_sampled: 197802
    num_steps_trained: 197802
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,99,2605.07,197802,18.9433,20.75,9.63,109.84


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 199800
  custom_metrics: {}
  date: 2021-11-13_22-13-18
  done: false
  episode_len_mean: 109.09
  episode_media: {}
  episode_reward_max: 20.909999999999926
  episode_reward_mean: 19.276699999999927
  episode_reward_min: 12.960000000000015
  episodes_this_iter: 18
  episodes_total: 1925
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.5269197901089986
          entropy_coeff: 0.009999999999999998
          kl: 0.009688930504033699
          policy_loss: -0.042059803452520146
          total_loss: 0.16226651954154173
          vf_explained_var: 0.9917386770248413
          vf_loss: 0.21668884051697596
    num_agent_steps_sampled: 199800
    num_agent_steps_trained: 199800
    num_steps_sampled: 199800
    num_steps_trained: 199800
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,100,2627.29,199800,19.2767,20.91,12.96,109.09


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 201798
  custom_metrics: {}
  date: 2021-11-13_22-13-40
  done: false
  episode_len_mean: 109.06
  episode_media: {}
  episode_reward_max: 20.909999999999926
  episode_reward_mean: 19.34339999999992
  episode_reward_min: 11.890000000000025
  episodes_this_iter: 19
  episodes_total: 1944
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.52400822752998
          entropy_coeff: 0.009999999999999998
          kl: 0.016878669145814703
          policy_loss: 0.011668770902213596
          total_loss: 0.38363510964970504
          vf_explained_var: 0.9859105944633484
          vf_loss: 0.38214282020926477
    num_agent_steps_sampled: 201798
    num_agent_steps_trained: 201798
    num_steps_sampled: 201798
    num_steps_trained: 201798
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,101,2649.49,201798,19.3434,20.91,11.89,109.06


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 203796
  custom_metrics: {}
  date: 2021-11-13_22-14-03
  done: false
  episode_len_mean: 108.8
  episode_media: {}
  episode_reward_max: 20.929999999999968
  episode_reward_mean: 19.24699999999992
  episode_reward_min: 9.70000000000002
  episodes_this_iter: 18
  episodes_total: 1962
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.4870219798315139
          entropy_coeff: 0.009999999999999998
          kl: 0.01152914193653896
          policy_loss: -0.03231454566121102
          total_loss: 0.34802953935272635
          vf_explained_var: 0.9847089648246765
          vf_loss: 0.39175556263043765
    num_agent_steps_sampled: 203796
    num_agent_steps_trained: 203796
    num_steps_sampled: 203796
    num_steps_trained: 203796
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,102,2672.22,203796,19.247,20.93,9.7,108.8


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 205794
  custom_metrics: {}
  date: 2021-11-13_22-14-25
  done: false
  episode_len_mean: 109.25
  episode_media: {}
  episode_reward_max: 20.929999999999968
  episode_reward_mean: 19.29759999999992
  episode_reward_min: 9.70000000000002
  episodes_this_iter: 18
  episodes_total: 1980
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.554200349535261
          entropy_coeff: 0.009999999999999998
          kl: 0.009580686095933265
          policy_loss: 0.008197527325579098
          total_loss: 0.159075866338043
          vf_explained_var: 0.9943326115608215
          vf_loss: 0.1635461386115778
    num_agent_steps_sampled: 205794
    num_agent_steps_trained: 205794
    num_steps_sampled: 205794
    num_steps_trained: 205794
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,103,2694.69,205794,19.2976,20.93,9.7,109.25


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 207792
  custom_metrics: {}
  date: 2021-11-13_22-14-47
  done: false
  episode_len_mean: 108.92
  episode_media: {}
  episode_reward_max: 20.929999999999968
  episode_reward_mean: 19.406199999999924
  episode_reward_min: 9.70000000000002
  episodes_this_iter: 19
  episodes_total: 1999
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.5240440380005609
          entropy_coeff: 0.009999999999999998
          kl: 0.00912114228493261
          policy_loss: -0.06000282986178285
          total_loss: 0.09438596131900946
          vf_explained_var: 0.9943873286247253
          vf_loss: 0.1668928872616518
    num_agent_steps_sampled: 207792
    num_agent_steps_trained: 207792
    num_steps_sampled: 207792
    num_steps_trained: 207792
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,104,2716.75,207792,19.4062,20.93,9.7,108.92


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 209790
  custom_metrics: {}
  date: 2021-11-13_22-15-08
  done: false
  episode_len_mean: 109.75
  episode_media: {}
  episode_reward_max: 20.929999999999968
  episode_reward_mean: 19.383399999999924
  episode_reward_min: 9.70000000000002
  episodes_this_iter: 17
  episodes_total: 2016
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.5985376250176202
          entropy_coeff: 0.009999999999999998
          kl: 0.01393891516686539
          policy_loss: 0.018402711018210366
          total_loss: 0.3349499130178066
          vf_explained_var: 0.9890633821487427
          vf_loss: 0.32835090522255217
    num_agent_steps_sampled: 209790
    num_agent_steps_trained: 209790
    num_steps_sampled: 209790
    num_steps_trained: 209790
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,105,2737.68,209790,19.3834,20.93,9.7,109.75


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 211788
  custom_metrics: {}
  date: 2021-11-13_22-15-31
  done: false
  episode_len_mean: 110.11
  episode_media: {}
  episode_reward_max: 20.929999999999968
  episode_reward_mean: 19.37099999999992
  episode_reward_min: 9.70000000000002
  episodes_this_iter: 19
  episodes_total: 2035
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.4817906691914513
          entropy_coeff: 0.009999999999999998
          kl: 0.010879107619977546
          policy_loss: 0.0037458151312811036
          total_loss: 0.3545135883348329
          vf_explained_var: 0.9862349629402161
          vf_loss: 0.3623219486503374
    num_agent_steps_sampled: 211788
    num_agent_steps_trained: 211788
    num_steps_sampled: 211788
    num_steps_trained: 211788
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,106,2760.54,211788,19.371,20.93,9.7,110.11


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 213786
  custom_metrics: {}
  date: 2021-11-13_22-15-53
  done: false
  episode_len_mean: 110.56
  episode_media: {}
  episode_reward_max: 20.929999999999968
  episode_reward_mean: 19.549199999999917
  episode_reward_min: 13.890000000000024
  episodes_this_iter: 18
  episodes_total: 2053
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.5107008054142907
          entropy_coeff: 0.009999999999999998
          kl: 0.01004050987229074
          policy_loss: -0.03623255267739296
          total_loss: 0.14373347308664094
          vf_explained_var: 0.9927672147750854
          vf_loss: 0.19206087557332857
    num_agent_steps_sampled: 213786
    num_agent_steps_trained: 213786
    num_steps_sampled: 213786
    num_steps_trained: 213786
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,107,2782.86,213786,19.5492,20.93,13.89,110.56


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 215784
  custom_metrics: {}
  date: 2021-11-13_22-16-15
  done: false
  episode_len_mean: 110.96
  episode_media: {}
  episode_reward_max: 20.73999999999992
  episode_reward_mean: 19.479899999999915
  episode_reward_min: 13.890000000000024
  episodes_this_iter: 18
  episodes_total: 2071
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.5195326702935354
          entropy_coeff: 0.009999999999999998
          kl: 0.010093859801959718
          policy_loss: -0.01603868692403748
          total_loss: 0.18660244890266942
          vf_explained_var: 0.9919371008872986
          vf_loss: 0.2148083054593631
    num_agent_steps_sampled: 215784
    num_agent_steps_trained: 215784
    num_steps_sampled: 215784
    num_steps_trained: 215784
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,108,2804.47,215784,19.4799,20.74,13.89,110.96


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 217782
  custom_metrics: {}
  date: 2021-11-13_22-16-37
  done: false
  episode_len_mean: 110.88
  episode_media: {}
  episode_reward_max: 20.749999999999936
  episode_reward_mean: 19.464999999999918
  episode_reward_min: 13.890000000000024
  episodes_this_iter: 18
  episodes_total: 2089
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.491561802228292
          entropy_coeff: 0.009999999999999998
          kl: 0.009141465747854714
          policy_loss: -0.02100204144205366
          total_loss: 0.14567583020599115
          vf_explained_var: 0.9940244555473328
          vf_loss: 0.17885104996107873
    num_agent_steps_sampled: 217782
    num_agent_steps_trained: 217782
    num_steps_sampled: 217782
    num_steps_trained: 217782
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,109,2826.82,217782,19.465,20.75,13.89,110.88


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 219780
  custom_metrics: {}
  date: 2021-11-13_22-17-00
  done: false
  episode_len_mean: 109.7
  episode_media: {}
  episode_reward_max: 20.749999999999936
  episode_reward_mean: 19.284899999999922
  episode_reward_min: 11.870000000000022
  episodes_this_iter: 18
  episodes_total: 2107
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.4964484731356302
          entropy_coeff: 0.009999999999999998
          kl: 0.021917364156590825
          policy_loss: 0.015585773314038912
          total_loss: 0.5038386098242231
          vf_explained_var: 0.9821887016296387
          vf_loss: 0.49664211386726015
    num_agent_steps_sampled: 219780
    num_agent_steps_trained: 219780
    num_steps_sampled: 219780
    num_steps_trained: 219780
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,110,2849.45,219780,19.2849,20.75,11.87,109.7




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 221778
  custom_metrics: {}
  date: 2021-11-13_22-17-38
  done: false
  episode_len_mean: 108.55
  episode_media: {}
  episode_reward_max: 20.749999999999936
  episode_reward_mean: 19.12289999999992
  episode_reward_min: -0.16000000000000003
  episodes_this_iter: 19
  episodes_total: 2126
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5449711703118825
          entropy_coeff: 0.009999999999999998
          kl: 0.013335161404729116
          policy_loss: -0.013617381524472009
          total_loss: 0.9061789330095052
          vf_explained_var: 0.9664522409439087
          vf_loss: 0.9292452128160568
    num_agent_steps_sampled: 221778
    num_agent_steps_trained: 221778
    num_steps_sampled: 221778
    num_steps_trained: 2217

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,111,2887.16,221778,19.1229,20.75,-0.16,108.55




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 223776
  custom_metrics: {}
  date: 2021-11-13_22-18-15
  done: false
  episode_len_mean: 107.8
  episode_media: {}
  episode_reward_max: 20.749999999999936
  episode_reward_mean: 18.78529999999992
  episode_reward_min: -0.16000000000000003
  episodes_this_iter: 19
  episodes_total: 2145
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5275387236050197
          entropy_coeff: 0.009999999999999998
          kl: 0.012263872481979512
          policy_loss: -0.01457340351882435
          total_loss: 0.3166519745652165
          vf_explained_var: 0.9884583353996277
          vf_loss: 0.34098202383943965
    num_agent_steps_sampled: 223776
    num_agent_steps_trained: 223776
    num_steps_sampled: 223776
    num_steps_trained: 22377

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,112,2924.03,223776,18.7853,20.75,-0.16,107.8




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 225774
  custom_metrics: {}
  date: 2021-11-13_22-18-52
  done: false
  episode_len_mean: 106.72
  episode_media: {}
  episode_reward_max: 20.749999999999936
  episode_reward_mean: 18.166699999999924
  episode_reward_min: -0.16000000000000003
  episodes_this_iter: 19
  episodes_total: 2164
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5534823394957042
          entropy_coeff: 0.009999999999999998
          kl: 0.01479493187675393
          policy_loss: -0.039361762148993354
          total_loss: 1.0276805518549823
          vf_explained_var: 0.9555119276046753
          vf_loss: 1.0759194061869666
    num_agent_steps_sampled: 225774
    num_agent_steps_trained: 225774
    num_steps_sampled: 225774
    num_steps_trained: 2257

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,113,2961.71,225774,18.1667,20.75,-0.16,106.72


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 227772
  custom_metrics: {}
  date: 2021-11-13_22-19-14
  done: false
  episode_len_mean: 107.53
  episode_media: {}
  episode_reward_max: 20.749999999999936
  episode_reward_mean: 18.053599999999925
  episode_reward_min: -0.16000000000000003
  episodes_this_iter: 17
  episodes_total: 2181
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.6349083048956734
          entropy_coeff: 0.009999999999999998
          kl: 0.009524544940234525
          policy_loss: -0.032600074421082224
          total_loss: 0.29666149026403826
          vf_explained_var: 0.9875156283378601
          vf_loss: 0.3413245990517594
    num_agent_steps_sampled: 227772
    num_agent_steps_trained: 227772
    num_steps_sampled: 227772
    num_steps_trained: 22

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,114,2983.02,227772,18.0536,20.75,-0.16,107.53


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 229770
  custom_metrics: {}
  date: 2021-11-13_22-19-36
  done: false
  episode_len_mean: 108.06
  episode_media: {}
  episode_reward_max: 20.98999999999995
  episode_reward_mean: 18.260699999999925
  episode_reward_min: -0.16000000000000003
  episodes_this_iter: 19
  episodes_total: 2200
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5143906792004904
          entropy_coeff: 0.009999999999999998
          kl: 0.009062073921756085
          policy_loss: 0.0021644574723073415
          total_loss: 0.17499771015275092
          vf_explained_var: 0.9925483465194702
          vf_loss: 0.183899228452217
    num_agent_steps_sampled: 229770
    num_agent_steps_trained: 229770
    num_steps_sampled: 229770
    num_steps_trained: 2297

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,115,3005.3,229770,18.2607,20.99,-0.16,108.06


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 231768
  custom_metrics: {}
  date: 2021-11-13_22-19-58
  done: false
  episode_len_mean: 108.79
  episode_media: {}
  episode_reward_max: 20.98999999999995
  episode_reward_mean: 18.46749999999992
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 18
  episodes_total: 2218
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5020466781797863
          entropy_coeff: 0.009999999999999998
          kl: 0.012626670340362764
          policy_loss: 0.002509334186712901
          total_loss: 0.3957965216998543
          vf_explained_var: 0.9829823970794678
          vf_loss: 0.40262565275742895
    num_agent_steps_sampled: 231768
    num_agent_steps_trained: 231768
    num_steps_sampled: 231768
    num_steps_trained: 2317

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,116,3027.39,231768,18.4675,20.99,-0.06,108.79


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 233766
  custom_metrics: {}
  date: 2021-11-13_22-20-21
  done: false
  episode_len_mean: 109.44
  episode_media: {}
  episode_reward_max: 20.98999999999995
  episode_reward_mean: 18.50469999999992
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 18
  episodes_total: 2236
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5083488208906992
          entropy_coeff: 0.009999999999999998
          kl: 0.010085599964105926
          policy_loss: -0.004934478976896831
          total_loss: 0.20735932698562032
          vf_explained_var: 0.9915047883987427
          vf_loss: 0.22283877309943947
    num_agent_steps_sampled: 233766
    num_agent_steps_trained: 233766
    num_steps_sampled: 233766
    num_steps_trained: 23

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,117,3049.79,233766,18.5047,20.99,-0.06,109.44


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 235764
  custom_metrics: {}
  date: 2021-11-13_22-20-44
  done: false
  episode_len_mean: 109.93
  episode_media: {}
  episode_reward_max: 20.98999999999995
  episode_reward_mean: 19.249999999999922
  episode_reward_min: 10.150000000000022
  episodes_this_iter: 20
  episodes_total: 2256
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5384069045384725
          entropy_coeff: 0.009999999999999998
          kl: 0.013439117001188095
          policy_loss: -0.01774112568015144
          total_loss: 0.20153091260719866
          vf_explained_var: 0.9917030930519104
          vf_loss: 0.2286085070776088
    num_agent_steps_sampled: 235764
    num_agent_steps_trained: 235764
    num_steps_sampled: 235764
    num_steps_trained: 235764

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,118,3072.98,235764,19.25,20.99,10.15,109.93


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 237762
  custom_metrics: {}
  date: 2021-11-13_22-21-05
  done: false
  episode_len_mean: 109.07
  episode_media: {}
  episode_reward_max: 20.98999999999995
  episode_reward_mean: 19.43149999999992
  episode_reward_min: 10.150000000000022
  episodes_this_iter: 18
  episodes_total: 2274
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.524208843140375
          entropy_coeff: 0.009999999999999998
          kl: 0.010151442130571075
          policy_loss: -0.004857290731299491
          total_loss: 0.29001633652064596
          vf_explained_var: 0.9889280200004578
          vf_loss: 0.3055475642638547
    num_agent_steps_sampled: 237762
    num_agent_steps_trained: 237762
    num_steps_sampled: 237762
    num_steps_trained: 237762


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,119,3094.61,237762,19.4315,20.99,10.15,109.07


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 239760
  custom_metrics: {}
  date: 2021-11-13_22-21-27
  done: false
  episode_len_mean: 108.26
  episode_media: {}
  episode_reward_max: 20.98999999999995
  episode_reward_mean: 19.531699999999923
  episode_reward_min: 10.150000000000022
  episodes_this_iter: 18
  episodes_total: 2292
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4931646233513243
          entropy_coeff: 0.009999999999999998
          kl: 0.014903703654213722
          policy_loss: -0.013004407180207116
          total_loss: 0.27586420301702763
          vf_explained_var: 0.9906978011131287
          vf_loss: 0.2970935911649749
    num_agent_steps_sampled: 239760
    num_agent_steps_trained: 239760
    num_steps_sampled: 239760
    num_steps_trained: 23976

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,120,3116.3,239760,19.5317,20.99,10.15,108.26


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 241758
  custom_metrics: {}
  date: 2021-11-13_22-21-49
  done: false
  episode_len_mean: 108.76
  episode_media: {}
  episode_reward_max: 20.979999999999993
  episode_reward_mean: 19.56229999999992
  episode_reward_min: 10.150000000000022
  episodes_this_iter: 18
  episodes_total: 2310
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4994326074918112
          entropy_coeff: 0.009999999999999998
          kl: 0.009817733914353505
          policy_loss: -0.01700183575352033
          total_loss: 0.1425496319308877
          vf_explained_var: 0.9944119453430176
          vf_loss: 0.17012781112321784
    num_agent_steps_sampled: 241758
    num_agent_steps_trained: 241758
    num_steps_sampled: 241758
    num_steps_trained: 241758

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,121,3137.9,241758,19.5623,20.98,10.15,108.76


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 243756
  custom_metrics: {}
  date: 2021-11-13_22-22-12
  done: false
  episode_len_mean: 107.25
  episode_media: {}
  episode_reward_max: 20.809999999999935
  episode_reward_mean: 19.632399999999922
  episode_reward_min: 10.610000000000017
  episodes_this_iter: 20
  episodes_total: 2330
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3920848261742365
          entropy_coeff: 0.009999999999999998
          kl: 0.014479340405241958
          policy_loss: -0.02699862601501601
          total_loss: 0.17728773423780997
          vf_explained_var: 0.9929279685020447
          vf_loss: 0.21169150464591527
    num_agent_steps_sampled: 243756
    num_agent_steps_trained: 243756
    num_steps_sampled: 243756
    num_steps_trained: 2437

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,122,3160.96,243756,19.6324,20.81,10.61,107.25


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 245754
  custom_metrics: {}
  date: 2021-11-13_22-22-34
  done: false
  episode_len_mean: 106.73
  episode_media: {}
  episode_reward_max: 20.799999999999947
  episode_reward_mean: 19.790999999999922
  episode_reward_min: 10.610000000000017
  episodes_this_iter: 18
  episodes_total: 2348
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.502209522610619
          entropy_coeff: 0.009999999999999998
          kl: 0.008689813120419896
          policy_loss: -0.025553224164815175
          total_loss: 0.03623930842039131
          vf_explained_var: 0.9973798394203186
          vf_loss: 0.07290421275510675
    num_agent_steps_sampled: 245754
    num_agent_steps_trained: 245754
    num_steps_sampled: 245754
    num_steps_trained: 2457

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,123,3182.95,245754,19.791,20.8,10.61,106.73


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 247752
  custom_metrics: {}
  date: 2021-11-13_22-22-57
  done: false
  episode_len_mean: 105.9
  episode_media: {}
  episode_reward_max: 20.799999999999947
  episode_reward_mean: 20.042499999999926
  episode_reward_min: 12.21000000000002
  episodes_this_iter: 21
  episodes_total: 2369
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4411815121060325
          entropy_coeff: 0.009999999999999998
          kl: 0.010718750226878973
          policy_loss: 0.006464650730292002
          total_loss: 0.22534533532425052
          vf_explained_var: 0.992734432220459
          vf_loss: 0.2284690615144514
    num_agent_steps_sampled: 247752
    num_agent_steps_trained: 247752
    num_steps_sampled: 247752
    num_steps_trained: 247752
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,124,3206.22,247752,20.0425,20.8,12.21,105.9


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 249750
  custom_metrics: {}
  date: 2021-11-13_22-23-20
  done: false
  episode_len_mean: 104.89
  episode_media: {}
  episode_reward_max: 20.929999999999993
  episode_reward_mean: 19.949599999999922
  episode_reward_min: 12.21000000000002
  episodes_this_iter: 19
  episodes_total: 2388
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4270416327885218
          entropy_coeff: 0.009999999999999998
          kl: 0.017678336058853335
          policy_loss: -0.004761469257729394
          total_loss: 0.40543773239921954
          vf_explained_var: 0.9851447939872742
          vf_loss: 0.41651436409779957
    num_agent_steps_sampled: 249750
    num_agent_steps_trained: 249750
    num_steps_sampled: 249750
    num_steps_trained: 2497

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,125,3229.31,249750,19.9496,20.93,12.21,104.89


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 251748
  custom_metrics: {}
  date: 2021-11-13_22-23-43
  done: false
  episode_len_mean: 103.69
  episode_media: {}
  episode_reward_max: 20.929999999999993
  episode_reward_mean: 19.88269999999993
  episode_reward_min: 13.100000000000012
  episodes_this_iter: 19
  episodes_total: 2407
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4407541030929203
          entropy_coeff: 0.009999999999999998
          kl: 0.01090299134072459
          policy_loss: 0.0050749129482678
          total_loss: 0.21391154231414908
          vf_explained_var: 0.9918326139450073
          vf_loss: 0.21833782313125474
    num_agent_steps_sampled: 251748
    num_agent_steps_trained: 251748
    num_steps_sampled: 251748
    num_steps_trained: 251748
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,126,3251.69,251748,19.8827,20.93,13.1,103.69


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 253746
  custom_metrics: {}
  date: 2021-11-13_22-24-07
  done: false
  episode_len_mean: 104.29
  episode_media: {}
  episode_reward_max: 20.929999999999993
  episode_reward_mean: 19.92159999999993
  episode_reward_min: 13.100000000000012
  episodes_this_iter: 19
  episodes_total: 2426
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4794186137971423
          entropy_coeff: 0.009999999999999998
          kl: 0.011634560688950035
          policy_loss: -0.007013310953265145
          total_loss: 0.254790579305873
          vf_explained_var: 0.9901389479637146
          vf_loss: 0.2713625287725812
    num_agent_steps_sampled: 253746
    num_agent_steps_trained: 253746
    num_steps_sampled: 253746
    num_steps_trained: 253746


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,127,3275.55,253746,19.9216,20.93,13.1,104.29


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 255744
  custom_metrics: {}
  date: 2021-11-13_22-24-30
  done: false
  episode_len_mean: 103.6
  episode_media: {}
  episode_reward_max: 20.929999999999993
  episode_reward_mean: 19.907999999999927
  episode_reward_min: 13.100000000000012
  episodes_this_iter: 20
  episodes_total: 2446
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.443952248777662
          entropy_coeff: 0.009999999999999998
          kl: 0.0070316469196924575
          policy_loss: -0.04285572540192377
          total_loss: 0.06569482772832826
          vf_explained_var: 0.995838463306427
          vf_loss: 0.11982583740637416
    num_agent_steps_sampled: 255744
    num_agent_steps_trained: 255744
    num_steps_sampled: 255744
    num_steps_trained: 255744

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,128,3299.19,255744,19.908,20.93,13.1,103.6


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 257742
  custom_metrics: {}
  date: 2021-11-13_22-24-53
  done: false
  episode_len_mean: 104.86
  episode_media: {}
  episode_reward_max: 20.929999999999993
  episode_reward_mean: 19.970599999999926
  episode_reward_min: 13.100000000000012
  episodes_this_iter: 17
  episodes_total: 2463
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4773752695038205
          entropy_coeff: 0.009999999999999998
          kl: 0.00893591520736963
          policy_loss: -0.03763970902100915
          total_loss: 0.029747545976369152
          vf_explained_var: 0.9974457025527954
          vf_loss: 0.07813984577854474
    num_agent_steps_sampled: 257742
    num_agent_steps_trained: 257742
    num_steps_sampled: 257742
    num_steps_trained: 2577

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,129,3321.52,257742,19.9706,20.93,13.1,104.86




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 259740
  custom_metrics: {}
  date: 2021-11-13_22-25-31
  done: false
  episode_len_mean: 104.86
  episode_media: {}
  episode_reward_max: 20.86999999999993
  episode_reward_mean: 19.882399999999926
  episode_reward_min: -0.02
  episodes_this_iter: 20
  episodes_total: 2483
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.47362033923467
          entropy_coeff: 0.009999999999999998
          kl: 0.007859472641667111
          policy_loss: 7.50169867560977e-05
          total_loss: 0.5247792783636778
          vf_explained_var: 0.9818253517150879
          vf_loss: 0.5359037049824283
    num_agent_steps_sampled: 259740
    num_agent_steps_trained: 259740
    num_steps_sampled: 259740
    num_steps_trained: 259740
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,130,3359.46,259740,19.8824,20.87,-0.02,104.86




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 261738
  custom_metrics: {}
  date: 2021-11-13_22-26-11
  done: false
  episode_len_mean: 103.42
  episode_media: {}
  episode_reward_max: 20.959999999999937
  episode_reward_mean: 19.833299999999923
  episode_reward_min: -0.02
  episodes_this_iter: 21
  episodes_total: 2504
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.395833402588254
          entropy_coeff: 0.009999999999999998
          kl: 0.008643012064885842
          policy_loss: 0.003463817210424514
          total_loss: 0.7729129164790114
          vf_explained_var: 0.9744617342948914
          vf_loss: 0.7795181223767854
    num_agent_steps_sampled: 261738
    num_agent_steps_trained: 261738
    num_steps_sampled: 261738
    num_steps_trained: 261738
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,131,3399.51,261738,19.8333,20.96,-0.02,103.42




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 263736
  custom_metrics: {}
  date: 2021-11-13_22-26-51
  done: false
  episode_len_mean: 102.08
  episode_media: {}
  episode_reward_max: 20.959999999999937
  episode_reward_mean: 19.680899999999923
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 2524
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4830123560769217
          entropy_coeff: 0.009999999999999998
          kl: 0.00846247285470421
          policy_loss: 0.003449346391218049
          total_loss: 0.8458544768725655
          vf_explained_var: 0.9703235626220703
          vf_loss: 0.8534271473153716
    num_agent_steps_sampled: 263736
    num_agent_steps_trained: 263736
    num_steps_sampled: 263736
    num_steps_trained: 2637

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,132,3439.65,263736,19.6809,20.96,-0.06,102.08


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 265734
  custom_metrics: {}
  date: 2021-11-13_22-27-14
  done: false
  episode_len_mean: 102.46
  episode_media: {}
  episode_reward_max: 20.959999999999937
  episode_reward_mean: 19.61679999999992
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 18
  episodes_total: 2542
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4718613721075513
          entropy_coeff: 0.009999999999999998
          kl: 0.007181482541403291
          policy_loss: -0.06267304789452326
          total_loss: 0.039278995068300335
          vf_explained_var: 0.9967717528343201
          vf_loss: 0.1134389881222021
    num_agent_steps_sampled: 265734
    num_agent_steps_trained: 265734
    num_steps_sampled: 265734
    num_steps_trained: 26

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,133,3462.94,265734,19.6168,20.96,-0.06,102.46


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 267732
  custom_metrics: {}
  date: 2021-11-13_22-27-37
  done: false
  episode_len_mean: 102.65
  episode_media: {}
  episode_reward_max: 20.959999999999937
  episode_reward_mean: 19.564499999999924
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 2562
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4297635084106808
          entropy_coeff: 0.009999999999999998
          kl: 0.010319891628256833
          policy_loss: -0.047169186600617
          total_loss: 0.07249586558235543
          vf_explained_var: 0.9957345128059387
          vf_loss: 0.1293187372209061
    num_agent_steps_sampled: 267732
    num_agent_steps_trained: 267732
    num_steps_sampled: 267732
    num_steps_trained: 2677

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,134,3485.31,267732,19.5645,20.96,-0.06,102.65


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 269730
  custom_metrics: {}
  date: 2021-11-13_22-27-59
  done: false
  episode_len_mean: 103.56
  episode_media: {}
  episode_reward_max: 20.959999999999937
  episode_reward_mean: 19.69879999999992
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 18
  episodes_total: 2580
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4687679767608643
          entropy_coeff: 0.009999999999999998
          kl: 0.011881254250337713
          policy_loss: -0.006997346871399454
          total_loss: 0.1187701811393102
          vf_explained_var: 0.9954066872596741
          vf_loss: 0.13510864602313155
    num_agent_steps_sampled: 269730
    num_agent_steps_trained: 269730
    num_steps_sampled: 269730
    num_steps_trained: 26

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,135,3507.82,269730,19.6988,20.96,-0.06,103.56


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 271728
  custom_metrics: {}
  date: 2021-11-13_22-28-21
  done: false
  episode_len_mean: 105.11
  episode_media: {}
  episode_reward_max: 20.969999999999924
  episode_reward_mean: 19.736499999999914
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 18
  episodes_total: 2598
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.458115824063619
          entropy_coeff: 0.009999999999999998
          kl: 0.009429831215961807
          policy_loss: -0.00421953478029796
          total_loss: 0.12218629313366754
          vf_explained_var: 0.9956430792808533
          vf_loss: 0.13674356234925134
    num_agent_steps_sampled: 271728
    num_agent_steps_trained: 271728
    num_steps_sampled: 271728
    num_steps_trained: 27

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,136,3529.78,271728,19.7365,20.97,-0.06,105.11


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 273726
  custom_metrics: {}
  date: 2021-11-13_22-28-43
  done: false
  episode_len_mean: 108.08
  episode_media: {}
  episode_reward_max: 20.969999999999924
  episode_reward_mean: 20.043199999999914
  episode_reward_min: 16.219999999999914
  episodes_this_iter: 18
  episodes_total: 2616
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.428591177577064
          entropy_coeff: 0.009999999999999998
          kl: 0.007642887988449502
          policy_loss: -0.0244752447165194
          total_loss: 0.0652734353162703
          vf_explained_var: 0.9968636631965637
          vf_loss: 0.10059529095888138
    num_agent_steps_sampled: 273726
    num_agent_steps_trained: 273726
    num_steps_sampled: 273726
    num_steps_trained: 273726


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,137,3551.68,273726,20.0432,20.97,16.22,108.08


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 275724
  custom_metrics: {}
  date: 2021-11-13_22-29-06
  done: false
  episode_len_mean: 107.98
  episode_media: {}
  episode_reward_max: 20.969999999999924
  episode_reward_mean: 19.95609999999991
  episode_reward_min: 13.190000000000012
  episodes_this_iter: 18
  episodes_total: 2634
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4174760177021934
          entropy_coeff: 0.009999999999999998
          kl: 0.019509521419876497
          policy_loss: -0.020533596440440133
          total_loss: 0.2670450445796762
          vf_explained_var: 0.9903086423873901
          vf_loss: 0.2929741162984144
    num_agent_steps_sampled: 275724
    num_agent_steps_trained: 275724
    num_steps_sampled: 275724
    num_steps_trained: 275724

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,138,3574.42,275724,19.9561,20.97,13.19,107.98


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 277722
  custom_metrics: {}
  date: 2021-11-13_22-29-29
  done: false
  episode_len_mean: 108.15
  episode_media: {}
  episode_reward_max: 20.969999999999924
  episode_reward_mean: 19.944599999999912
  episode_reward_min: 13.190000000000012
  episodes_this_iter: 19
  episodes_total: 2653
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4204297394979568
          entropy_coeff: 0.009999999999999998
          kl: 0.009581412769662167
          policy_loss: -0.009748480823777972
          total_loss: 0.13266465592065027
          vf_explained_var: 0.9954839944839478
          vf_loss: 0.15230579778906844
    num_agent_steps_sampled: 277722
    num_agent_steps_trained: 277722
    num_steps_sampled: 277722
    num_steps_trained: 277

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,139,3597.35,277722,19.9446,20.97,13.19,108.15


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 279720
  custom_metrics: {}
  date: 2021-11-13_22-29-51
  done: false
  episode_len_mean: 108.46
  episode_media: {}
  episode_reward_max: 20.969999999999924
  episode_reward_mean: 19.88799999999991
  episode_reward_min: 13.060000000000013
  episodes_this_iter: 19
  episodes_total: 2672
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4451126439230784
          entropy_coeff: 0.009999999999999998
          kl: 0.01717443682989909
          policy_loss: -0.0037448676480423835
          total_loss: 0.18443434120466312
          vf_explained_var: 0.9935789704322815
          vf_loss: 0.19490184140879482
    num_agent_steps_sampled: 279720
    num_agent_steps_trained: 279720
    num_steps_sampled: 279720
    num_steps_trained: 2797

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,140,3619.6,279720,19.888,20.97,13.06,108.46


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 281718
  custom_metrics: {}
  date: 2021-11-13_22-30-14
  done: false
  episode_len_mean: 107.58
  episode_media: {}
  episode_reward_max: 20.969999999999924
  episode_reward_mean: 19.888299999999916
  episode_reward_min: 13.060000000000013
  episodes_this_iter: 18
  episodes_total: 2690
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5144958070346288
          entropy_coeff: 0.009999999999999998
          kl: 0.007229146325086325
          policy_loss: -0.05451298760516303
          total_loss: 0.10073054425773166
          vf_explained_var: 0.9947423934936523
          vf_loss: 0.1671353700437716
    num_agent_steps_sampled: 281718
    num_agent_steps_trained: 281718
    num_steps_sampled: 281718
    num_steps_trained: 28171

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,141,3641.99,281718,19.8883,20.97,13.06,107.58


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 283716
  custom_metrics: {}
  date: 2021-11-13_22-30-37
  done: false
  episode_len_mean: 106.33
  episode_media: {}
  episode_reward_max: 20.91999999999992
  episode_reward_mean: 19.742199999999915
  episode_reward_min: 10.75000000000002
  episodes_this_iter: 20
  episodes_total: 2710
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3560237072762988
          entropy_coeff: 0.009999999999999998
          kl: 0.015595962247373973
          policy_loss: -0.00591961858528001
          total_loss: 0.4426724749129443
          vf_explained_var: 0.9834702014923096
          vf_loss: 0.4551341507229067
    num_agent_steps_sampled: 283716
    num_agent_steps_trained: 283716
    num_steps_sampled: 283716
    num_steps_trained: 283716
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,142,3664.91,283716,19.7422,20.92,10.75,106.33


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 285714
  custom_metrics: {}
  date: 2021-11-13_22-31-00
  done: false
  episode_len_mean: 105.22
  episode_media: {}
  episode_reward_max: 20.93999999999992
  episode_reward_mean: 19.801499999999916
  episode_reward_min: 10.75000000000002
  episodes_this_iter: 19
  episodes_total: 2729
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4233103718076434
          entropy_coeff: 0.009999999999999998
          kl: 0.009407501801457883
          policy_loss: 0.005880022510176613
          total_loss: 0.18458108408820062
          vf_explained_var: 0.9934036731719971
          vf_loss: 0.18870078875195412
    num_agent_steps_sampled: 285714
    num_agent_steps_trained: 285714
    num_steps_sampled: 285714
    num_steps_trained: 285714

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,143,3688.57,285714,19.8015,20.94,10.75,105.22


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 287712
  custom_metrics: {}
  date: 2021-11-13_22-31-23
  done: false
  episode_len_mean: 105.13
  episode_media: {}
  episode_reward_max: 20.93999999999992
  episode_reward_mean: 19.92969999999992
  episode_reward_min: 10.75000000000002
  episodes_this_iter: 20
  episodes_total: 2749
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4499386900947207
          entropy_coeff: 0.009999999999999998
          kl: 0.009329008765877847
          policy_loss: -0.014236152580096608
          total_loss: 0.12212334958215555
          vf_explained_var: 0.9952176213264465
          vf_loss: 0.14666083456859702
    num_agent_steps_sampled: 287712
    num_agent_steps_trained: 287712
    num_steps_sampled: 287712
    num_steps_trained: 287712

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,144,3711.06,287712,19.9297,20.94,10.75,105.13


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 289710
  custom_metrics: {}
  date: 2021-11-13_22-31-46
  done: false
  episode_len_mean: 104.28
  episode_media: {}
  episode_reward_max: 20.93999999999992
  episode_reward_mean: 20.01589999999992
  episode_reward_min: 10.75000000000002
  episodes_this_iter: 19
  episodes_total: 2768
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.442285514445532
          entropy_coeff: 0.009999999999999998
          kl: 0.008135640927465552
          policy_loss: -0.0516893308609724
          total_loss: 0.10572472480790955
          vf_explained_var: 0.9942545294761658
          vf_loss: 0.16817587186538038
    num_agent_steps_sampled: 289710
    num_agent_steps_trained: 289710
    num_steps_sampled: 289710
    num_steps_trained: 289710
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,145,3734.08,289710,20.0159,20.94,10.75,104.28


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 291708
  custom_metrics: {}
  date: 2021-11-13_22-32-11
  done: false
  episode_len_mean: 102.87
  episode_media: {}
  episode_reward_max: 20.989999999999924
  episode_reward_mean: 19.952299999999923
  episode_reward_min: 10.75000000000002
  episodes_this_iter: 20
  episodes_total: 2788
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3230297258922032
          entropy_coeff: 0.009999999999999998
          kl: 0.011768585003536006
          policy_loss: -0.011525622329541615
          total_loss: 0.11277125593097437
          vf_explained_var: 0.9958009123802185
          vf_loss: 0.13223131204999627
    num_agent_steps_sampled: 291708
    num_agent_steps_trained: 291708
    num_steps_sampled: 291708
    num_steps_trained: 2917

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,146,3759.15,291708,19.9523,20.99,10.75,102.87


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 293706
  custom_metrics: {}
  date: 2021-11-13_22-32-35
  done: false
  episode_len_mean: 102.13
  episode_media: {}
  episode_reward_max: 20.989999999999924
  episode_reward_mean: 20.141399999999916
  episode_reward_min: 16.569999999999986
  episodes_this_iter: 21
  episodes_total: 2809
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.505369454338437
          entropy_coeff: 0.009999999999999998
          kl: 0.010384160465186478
          policy_loss: -0.013468626443119277
          total_loss: 0.16391007987604964
          vf_explained_var: 0.9938159584999084
          vf_loss: 0.18775952701412496
    num_agent_steps_sampled: 293706
    num_agent_steps_trained: 293706
    num_steps_sampled: 293706
    num_steps_trained: 2937

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,147,3783.16,293706,20.1414,20.99,16.57,102.13


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 295704
  custom_metrics: {}
  date: 2021-11-13_22-32-58
  done: false
  episode_len_mean: 102.21
  episode_media: {}
  episode_reward_max: 20.989999999999924
  episode_reward_mean: 20.13569999999992
  episode_reward_min: 17.059999999999956
  episodes_this_iter: 18
  episodes_total: 2827
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3822459044910613
          entropy_coeff: 0.009999999999999998
          kl: 0.008536067691799433
          policy_loss: -0.006016666335718972
          total_loss: 0.14345451944640705
          vf_explained_var: 0.9953513145446777
          vf_loss: 0.1594524152754318
    num_agent_steps_sampled: 295704
    num_agent_steps_trained: 295704
    num_steps_sampled: 295704
    num_steps_trained: 29570

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,148,3806.65,295704,20.1357,20.99,17.06,102.21




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 297702
  custom_metrics: {}
  date: 2021-11-13_22-33-37
  done: false
  episode_len_mean: 101.02
  episode_media: {}
  episode_reward_max: 20.989999999999924
  episode_reward_mean: 19.944699999999923
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 22
  episodes_total: 2849
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5028460911342076
          entropy_coeff: 0.009999999999999998
          kl: 0.008926038346616696
          policy_loss: -0.014178711417618962
          total_loss: 0.7960593785691474
          vf_explained_var: 0.974318265914917
          vf_loss: 0.8212498385991369
    num_agent_steps_sampled: 297702
    num_agent_steps_trained: 297702
    num_steps_sampled: 297702
    num_steps_trained: 297

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,149,3845.03,297702,19.9447,20.99,-0.06,101.02




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 299700
  custom_metrics: {}
  date: 2021-11-13_22-34-29
  done: false
  episode_len_mean: 98.69
  episode_media: {}
  episode_reward_max: 20.989999999999924
  episode_reward_mean: 19.529499999999924
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 2869
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3619683418955122
          entropy_coeff: 0.009999999999999998
          kl: 0.00870682856440214
          policy_loss: 0.006282333674884978
          total_loss: 1.4244752591675414
          vf_explained_var: 0.952521026134491
          vf_loss: 1.4278945510586103
    num_agent_steps_sampled: 299700
    num_agent_steps_trained: 299700
    num_steps_sampled: 299700
    num_steps_trained: 299700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,150,3897.25,299700,19.5295,20.99,-0.06,98.69


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 301698
  custom_metrics: {}
  date: 2021-11-13_22-34-54
  done: false
  episode_len_mean: 98.1
  episode_media: {}
  episode_reward_max: 20.969999999999928
  episode_reward_mean: 19.469499999999925
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 2890
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3285671722321284
          entropy_coeff: 0.009999999999999998
          kl: 0.03345914045605174
          policy_loss: -0.005308990819113595
          total_loss: 0.6687940300752719
          vf_explained_var: 0.9764987826347351
          vf_loss: 0.6723320808793817
    num_agent_steps_sampled: 301698
    num_agent_steps_trained: 301698
    num_steps_sampled: 301698
    num_steps_trained: 30169

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,151,3922.38,301698,19.4695,20.97,-0.06,98.1


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 303696
  custom_metrics: {}
  date: 2021-11-13_22-35-17
  done: false
  episode_len_mean: 98.78
  episode_media: {}
  episode_reward_max: 20.969999999999928
  episode_reward_mean: 19.502099999999924
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 2909
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.3896647674696787
          entropy_coeff: 0.009999999999999998
          kl: 0.006074903508326303
          policy_loss: -0.013153910317591258
          total_loss: 0.08448522258549929
          vf_explained_var: 0.9965959191322327
          vf_loss: 0.10743522047109547
    num_agent_steps_sampled: 303696
    num_agent_steps_trained: 303696
    num_steps_sampled: 303696
    num_steps_trained: 303696
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,152,3945.12,303696,19.5021,20.97,-0.06,98.78


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 305694
  custom_metrics: {}
  date: 2021-11-13_22-35-41
  done: false
  episode_len_mean: 98.82
  episode_media: {}
  episode_reward_max: 20.969999999999928
  episode_reward_mean: 19.57169999999992
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 2928
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.398242525827317
          entropy_coeff: 0.009999999999999998
          kl: 0.0052019791631767055
          policy_loss: -0.0250583733565041
          total_loss: 0.08100513697025322
          vf_explained_var: 0.9964128136634827
          vf_loss: 0.11653459987470081
    num_agent_steps_sampled: 305694
    num_agent_steps_trained: 305694
    num_steps_sampled: 305694
    num_steps_trained: 305694
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,153,3969.38,305694,19.5717,20.97,-0.06,98.82


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 307692
  custom_metrics: {}
  date: 2021-11-13_22-36-05
  done: false
  episode_len_mean: 99.77
  episode_media: {}
  episode_reward_max: 21.00999999999993
  episode_reward_mean: 19.741899999999923
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 2948
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.374011905420394
          entropy_coeff: 0.009999999999999998
          kl: 0.008559342802883311
          policy_loss: -0.0081199382742246
          total_loss: 0.11611925035360314
          vf_explained_var: 0.996049165725708
          vf_loss: 0.13220175014187893
    num_agent_steps_sampled: 307692
    num_agent_steps_trained: 307692
    num_steps_sampled: 307692
    num_steps_trained: 307692
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,154,3992.8,307692,19.7419,21.01,-0.06,99.77


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 309690
  custom_metrics: {}
  date: 2021-11-13_22-36-27
  done: false
  episode_len_mean: 102.05
  episode_media: {}
  episode_reward_max: 21.00999999999993
  episode_reward_mean: 20.20779999999992
  episode_reward_min: 3.47000000000001
  episodes_this_iter: 18
  episodes_total: 2966
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.4262542900584994
          entropy_coeff: 0.009999999999999998
          kl: 0.005163459531689584
          policy_loss: -0.010697519539722375
          total_loss: 0.009651535155162925
          vf_explained_var: 0.9989943504333496
          vf_loss: 0.031126263538109405
    num_agent_steps_sampled: 309690
    num_agent_steps_trained: 309690
    num_steps_sampled: 309690
    num_steps_trained: 309690
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,155,4014.99,309690,20.2078,21.01,3.47,102.05


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 311688
  custom_metrics: {}
  date: 2021-11-13_22-36-50
  done: false
  episode_len_mean: 102.52
  episode_media: {}
  episode_reward_max: 21.00999999999993
  episode_reward_mean: 20.45749999999992
  episode_reward_min: 16.74999999999993
  episodes_this_iter: 21
  episodes_total: 2987
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.3589186872754777
          entropy_coeff: 0.009999999999999998
          kl: 0.007797797041142543
          policy_loss: -0.01511538422533444
          total_loss: 0.062067973032771125
          vf_explained_var: 0.9975245594978333
          vf_loss: 0.08550903262304409
    num_agent_steps_sampled: 311688
    num_agent_steps_trained: 311688
    num_steps_sampled: 311688
    num_steps_trained: 311688
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,156,4038.22,311688,20.4575,21.01,16.75,102.52


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 313686
  custom_metrics: {}
  date: 2021-11-13_22-37-14
  done: false
  episode_len_mean: 101.68
  episode_media: {}
  episode_reward_max: 21.069999999999983
  episode_reward_mean: 20.44689999999992
  episode_reward_min: 16.74999999999993
  episodes_this_iter: 20
  episodes_total: 3007
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.3823383785429455
          entropy_coeff: 0.009999999999999998
          kl: 0.00871543947986223
          policy_loss: -0.0020451033044429053
          total_loss: 0.09031054052923407
          vf_explained_var: 0.9968751668930054
          vf_loss: 0.10029610710307246
    num_agent_steps_sampled: 313686
    num_agent_steps_trained: 313686
    num_steps_sampled: 313686
    num_steps_trained: 313686
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,157,4062.22,313686,20.4469,21.07,16.75,101.68


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 315684
  custom_metrics: {}
  date: 2021-11-13_22-37-38
  done: false
  episode_len_mean: 101.2
  episode_media: {}
  episode_reward_max: 21.069999999999983
  episode_reward_mean: 20.465699999999927
  episode_reward_min: 16.74999999999993
  episodes_this_iter: 20
  episodes_total: 3027
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.3694803504716782
          entropy_coeff: 0.009999999999999998
          kl: 0.00676999676585796
          policy_loss: -0.015287603198417595
          total_loss: 0.024377276934683324
          vf_explained_var: 0.9985120296478271
          vf_loss: 0.04878993599808642
    num_agent_steps_sampled: 315684
    num_agent_steps_trained: 315684
    num_steps_sampled: 315684
    num_steps_trained: 315684
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,158,4085.86,315684,20.4657,21.07,16.75,101.2


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 317682
  custom_metrics: {}
  date: 2021-11-13_22-38-01
  done: false
  episode_len_mean: 101.02
  episode_media: {}
  episode_reward_max: 21.069999999999983
  episode_reward_mean: 20.557699999999922
  episode_reward_min: 18.499999999999922
  episodes_this_iter: 19
  episodes_total: 3046
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.675
          cur_lr: 5.000000000000001e-05
          entropy: 1.316752549012502
          entropy_coeff: 0.009999999999999998
          kl: 0.00495821711327803
          policy_loss: -0.0012207310114588055
          total_loss: 0.03466607511398338
          vf_explained_var: 0.9987621307373047
          vf_loss: 0.04570753654198987
    num_agent_steps_sampled: 317682
    num_agent_steps_trained: 317682
    num_steps_sampled: 317682
    num_steps_trained: 317682
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,159,4108.93,317682,20.5577,21.07,18.5,101.02


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 319680
  custom_metrics: {}
  date: 2021-11-13_22-38-24
  done: false
  episode_len_mean: 100.79
  episode_media: {}
  episode_reward_max: 21.069999999999983
  episode_reward_mean: 20.580899999999918
  episode_reward_min: 18.499999999999922
  episodes_this_iter: 20
  episodes_total: 3066
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3375
          cur_lr: 5.000000000000001e-05
          entropy: 1.3981547355651855
          entropy_coeff: 0.009999999999999998
          kl: 0.008466217819555073
          policy_loss: -0.028122225634398916
          total_loss: 0.02580568204146056
          vf_explained_var: 0.997929036617279
          vf_loss: 0.06505210508281986
    num_agent_steps_sampled: 319680
    num_agent_steps_trained: 319680
    num_steps_sampled: 319680
    num_steps_trained: 319680
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,160,4131.95,319680,20.5809,21.07,18.5,100.79


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 321678
  custom_metrics: {}
  date: 2021-11-13_22-38-47
  done: false
  episode_len_mean: 101.14
  episode_media: {}
  episode_reward_max: 21.069999999999983
  episode_reward_mean: 20.594799999999918
  episode_reward_min: 18.499999999999922
  episodes_this_iter: 20
  episodes_total: 3086
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3375
          cur_lr: 5.000000000000001e-05
          entropy: 1.348458542710259
          entropy_coeff: 0.009999999999999998
          kl: 0.00869776568654809
          policy_loss: -0.020856798351520583
          total_loss: 0.01681611922880014
          vf_explained_var: 0.9985589981079102
          vf_loss: 0.048222006400603624
    num_agent_steps_sampled: 321678
    num_agent_steps_trained: 321678
    num_steps_sampled: 321678
    num_steps_trained: 321678
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,161,4154.48,321678,20.5948,21.07,18.5,101.14


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 323676
  custom_metrics: {}
  date: 2021-11-13_22-39-10
  done: false
  episode_len_mean: 101.98
  episode_media: {}
  episode_reward_max: 21.029999999999934
  episode_reward_mean: 20.63189999999992
  episode_reward_min: 18.349999999999902
  episodes_this_iter: 19
  episodes_total: 3105
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3375
          cur_lr: 5.000000000000001e-05
          entropy: 1.3678115753900437
          entropy_coeff: 0.009999999999999998
          kl: 0.014476133959284388
          policy_loss: -0.005466752898480211
          total_loss: 0.054629286245575975
          vf_explained_var: 0.9978864192962646
          vf_loss: 0.06888845897324028
    num_agent_steps_sampled: 323676
    num_agent_steps_trained: 323676
    num_steps_sampled: 323676
    num_steps_trained: 323676
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,162,4177.6,323676,20.6319,21.03,18.35,101.98


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 325674
  custom_metrics: {}
  date: 2021-11-13_22-39-33
  done: false
  episode_len_mean: 101.7
  episode_media: {}
  episode_reward_max: 21.029999999999934
  episode_reward_mean: 20.513499999999922
  episode_reward_min: 13.150000000000013
  episodes_this_iter: 20
  episodes_total: 3125
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3375
          cur_lr: 5.000000000000001e-05
          entropy: 1.2693477528435844
          entropy_coeff: 0.009999999999999998
          kl: 0.02182599377555639
          policy_loss: 0.010055542187321754
          total_loss: 0.1934844488189334
          vf_explained_var: 0.9946568012237549
          vf_loss: 0.18875611028855754
    num_agent_steps_sampled: 325674
    num_agent_steps_trained: 325674
    num_steps_sampled: 325674
    num_steps_trained: 325674
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,163,4201.12,325674,20.5135,21.03,13.15,101.7


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 327672
  custom_metrics: {}
  date: 2021-11-13_22-39-57
  done: false
  episode_len_mean: 101.77
  episode_media: {}
  episode_reward_max: 21.029999999999934
  episode_reward_mean: 20.457499999999925
  episode_reward_min: 13.150000000000013
  episodes_this_iter: 20
  episodes_total: 3145
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.3257936103003365
          entropy_coeff: 0.009999999999999998
          kl: 0.008583491780898829
          policy_loss: -0.05401546515169598
          total_loss: 0.040824494936636514
          vf_explained_var: 0.9969787001609802
          vf_loss: 0.10375250367713826
    num_agent_steps_sampled: 327672
    num_agent_steps_trained: 327672
    num_steps_sampled: 327672
    num_steps_trained: 327672
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,164,4225.07,327672,20.4575,21.03,13.15,101.77


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 329670
  custom_metrics: {}
  date: 2021-11-13_22-40-20
  done: false
  episode_len_mean: 101.1
  episode_media: {}
  episode_reward_max: 21.029999999999934
  episode_reward_mean: 20.41719999999992
  episode_reward_min: 13.150000000000013
  episodes_this_iter: 19
  episodes_total: 3164
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2810634601683843
          entropy_coeff: 0.009999999999999998
          kl: 0.008316951647038552
          policy_loss: -0.003487581404901686
          total_loss: 0.07516046153115374
          vf_explained_var: 0.9973423480987549
          vf_loss: 0.08724821879572811
    num_agent_steps_sampled: 329670
    num_agent_steps_trained: 329670
    num_steps_sampled: 329670
    num_steps_trained: 329670
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,165,4248.05,329670,20.4172,21.03,13.15,101.1


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 331668
  custom_metrics: {}
  date: 2021-11-13_22-40-44
  done: false
  episode_len_mean: 100.67
  episode_media: {}
  episode_reward_max: 21.05999999999994
  episode_reward_mean: 20.362499999999923
  episode_reward_min: 13.150000000000013
  episodes_this_iter: 21
  episodes_total: 3185
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2186430198805673
          entropy_coeff: 0.009999999999999998
          kl: 0.008442579907385112
          policy_loss: -0.14789926870947792
          total_loss: -0.09925967823891413
          vf_explained_var: 0.9984161257743835
          vf_loss: 0.05655196376056189
    num_agent_steps_sampled: 331668
    num_agent_steps_trained: 331668
    num_steps_sampled: 331668
    num_steps_trained: 331668
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,166,4272.12,331668,20.3625,21.06,13.15,100.67




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 333666
  custom_metrics: {}
  date: 2021-11-13_22-41-24
  done: false
  episode_len_mean: 99.01
  episode_media: {}
  episode_reward_max: 21.05999999999994
  episode_reward_mean: 20.177699999999923
  episode_reward_min: 1.95
  episodes_this_iter: 21
  episodes_total: 3206
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.287534487247467
          entropy_coeff: 0.009999999999999998
          kl: 0.008765908742313125
          policy_loss: -0.026357426760452135
          total_loss: 0.6687409476842732
          vf_explained_var: 0.979040265083313
          vf_loss: 0.7035359886075769
    num_agent_steps_sampled: 333666
    num_agent_steps_trained: 333666
    num_steps_sampled: 333666
    num_steps_trained: 333666
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,167,4311.21,333666,20.1777,21.06,1.95,99.01




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 335664
  custom_metrics: {}
  date: 2021-11-13_22-42-18
  done: false
  episode_len_mean: 96.68
  episode_media: {}
  episode_reward_max: 21.05999999999994
  episode_reward_mean: 19.887799999999924
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 22
  episodes_total: 3228
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2947011067753746
          entropy_coeff: 0.009999999999999998
          kl: 0.008653826586994368
          policy_loss: 0.003493966277511347
          total_loss: 1.3600781259792192
          vf_explained_var: 0.9583820104598999
          vf_loss: 1.3651501762015479
    num_agent_steps_sampled: 335664
    num_agent_steps_trained: 335664
    num_steps_sampled: 335664
    num_steps_trained: 335664
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,168,4365.24,335664,19.8878,21.06,-0.06,96.68


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 337662
  custom_metrics: {}
  date: 2021-11-13_22-42-43
  done: false
  episode_len_mean: 96.77
  episode_media: {}
  episode_reward_max: 21.05999999999994
  episode_reward_mean: 19.97259999999993
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 3248
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.339213095960163
          entropy_coeff: 0.009999999999999998
          kl: 0.0065969255812754184
          policy_loss: -0.01142680715946924
          total_loss: 0.04748339792270036
          vf_explained_var: 0.9980356693267822
          vf_loss: 0.06896264212472099
    num_agent_steps_sampled: 337662
    num_agent_steps_trained: 337662
    num_steps_sampled: 337662
    num_steps_trained: 337662
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,169,4390.59,337662,19.9726,21.06,-0.06,96.77


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 339660
  custom_metrics: {}
  date: 2021-11-13_22-43-07
  done: false
  episode_len_mean: 96.49
  episode_media: {}
  episode_reward_max: 21.099999999999945
  episode_reward_mean: 19.969199999999926
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 3268
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.3137082542691911
          entropy_coeff: 0.009999999999999998
          kl: 0.00870614978758167
          policy_loss: -0.02256171933951832
          total_loss: 0.024108464341788066
          vf_explained_var: 0.9984856247901917
          vf_loss: 0.05539977884452258
    num_agent_steps_sampled: 339660
    num_agent_steps_trained: 339660
    num_steps_sampled: 339660
    num_steps_trained: 339660
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,170,4414.36,339660,19.9692,21.1,-0.06,96.49


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 341658
  custom_metrics: {}
  date: 2021-11-13_22-43-30
  done: false
  episode_len_mean: 97.64
  episode_media: {}
  episode_reward_max: 21.099999999999945
  episode_reward_mean: 20.143999999999927
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 3288
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.3143966260410491
          entropy_coeff: 0.009999999999999998
          kl: 0.006489989078548396
          policy_loss: -0.0633042712208061
          total_loss: -0.015842496257807526
          vf_explained_var: 0.9982614517211914
          vf_loss: 0.057320183963470515
    num_agent_steps_sampled: 341658
    num_agent_steps_trained: 341658
    num_steps_sampled: 341658
    num_steps_trained: 341658
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,171,4437.87,341658,20.144,21.1,-0.06,97.64


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 343656
  custom_metrics: {}
  date: 2021-11-13_22-43-55
  done: false
  episode_len_mean: 97.29
  episode_media: {}
  episode_reward_max: 21.099999999999945
  episode_reward_mean: 20.21689999999993
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 3309
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2666816081319536
          entropy_coeff: 0.009999999999999998
          kl: 0.008040297830406152
          policy_loss: -0.012806447205089387
          total_loss: 0.003990137523838452
          vf_explained_var: 0.999180257320404
          vf_loss: 0.025393001531206424
    num_agent_steps_sampled: 343656
    num_agent_steps_trained: 343656
    num_steps_sampled: 343656
    num_steps_trained: 343656
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,172,4462.24,343656,20.2169,21.1,-0.06,97.29


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 345654
  custom_metrics: {}
  date: 2021-11-13_22-44-19
  done: false
  episode_len_mean: 98.67
  episode_media: {}
  episode_reward_max: 21.099999999999945
  episode_reward_mean: 20.54109999999993
  episode_reward_min: 16.65999999999994
  episodes_this_iter: 21
  episodes_total: 3330
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2245909940628779
          entropy_coeff: 0.009999999999999998
          kl: 0.009193245711011216
          policy_loss: -0.08317857666739396
          total_loss: 0.024941209100541616
          vf_explained_var: 0.9962674379348755
          vf_loss: 0.1157116151370463
    num_agent_steps_sampled: 345654
    num_agent_steps_trained: 345654
    num_steps_sampled: 345654
    num_steps_trained: 345654
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,173,4486.61,345654,20.5411,21.1,16.66,98.67


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 347652
  custom_metrics: {}
  date: 2021-11-13_22-44-43
  done: false
  episode_len_mean: 97.39
  episode_media: {}
  episode_reward_max: 21.099999999999945
  episode_reward_mean: 20.560499999999923
  episode_reward_min: 16.65999999999994
  episodes_this_iter: 21
  episodes_total: 3351
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.3116822396005903
          entropy_coeff: 0.009999999999999998
          kl: 0.008260089477838815
          policy_loss: 0.005303994796815373
          total_loss: 0.06130630191238154
          vf_explained_var: 0.9982734322547913
          vf_loss: 0.06493746124740157
    num_agent_steps_sampled: 347652
    num_agent_steps_trained: 347652
    num_steps_sampled: 347652
    num_steps_trained: 347652
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,174,4510.7,347652,20.5605,21.1,16.66,97.39


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 349650
  custom_metrics: {}
  date: 2021-11-13_22-45-08
  done: false
  episode_len_mean: 96.85
  episode_media: {}
  episode_reward_max: 21.099999999999945
  episode_reward_mean: 20.55539999999992
  episode_reward_min: 16.65999999999994
  episodes_this_iter: 21
  episodes_total: 3372
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.311709139460609
          entropy_coeff: 0.009999999999999998
          kl: 0.011680251637837636
          policy_loss: -0.021190035431867555
          total_loss: 0.06707346308532924
          vf_explained_var: 0.9972570538520813
          vf_loss: 0.09546746115776755
    num_agent_steps_sampled: 349650
    num_agent_steps_trained: 349650
    num_steps_sampled: 349650
    num_steps_trained: 349650
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,175,4534.91,349650,20.5554,21.1,16.66,96.85


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 351648
  custom_metrics: {}
  date: 2021-11-13_22-45-31
  done: false
  episode_len_mean: 96.73
  episode_media: {}
  episode_reward_max: 21.099999999999945
  episode_reward_mean: 20.649699999999925
  episode_reward_min: 16.65999999999994
  episodes_this_iter: 20
  episodes_total: 3392
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.3645168452035814
          entropy_coeff: 0.009999999999999998
          kl: 0.0074949752683688906
          policy_loss: -0.011281460452647437
          total_loss: 0.017313913433324724
          vf_explained_var: 0.9989679455757141
          vf_loss: 0.0384462092737002
    num_agent_steps_sampled: 351648
    num_agent_steps_trained: 351648
    num_steps_sampled: 351648
    num_steps_trained: 351648
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,176,4558.43,351648,20.6497,21.1,16.66,96.73


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 353646
  custom_metrics: {}
  date: 2021-11-13_22-45-55
  done: false
  episode_len_mean: 96.92
  episode_media: {}
  episode_reward_max: 21.099999999999945
  episode_reward_mean: 20.615099999999924
  episode_reward_min: 16.65999999999994
  episodes_this_iter: 20
  episodes_total: 3412
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.276183269137428
          entropy_coeff: 0.009999999999999998
          kl: 0.007163038155893254
          policy_loss: -0.07573601016331287
          total_loss: -0.05222421790517512
          vf_explained_var: 0.9990706443786621
          vf_loss: 0.03264733590407386
    num_agent_steps_sampled: 353646
    num_agent_steps_trained: 353646
    num_steps_sampled: 353646
    num_steps_trained: 353646
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,177,4581.78,353646,20.6151,21.1,16.66,96.92


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 355644
  custom_metrics: {}
  date: 2021-11-13_22-46-17
  done: false
  episode_len_mean: 96.93
  episode_media: {}
  episode_reward_max: 21.099999999999945
  episode_reward_mean: 20.746599999999926
  episode_reward_min: 18.629999999999924
  episodes_this_iter: 21
  episodes_total: 3433
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2962045612789335
          entropy_coeff: 0.009999999999999998
          kl: 0.012391368320402486
          policy_loss: -0.019479838900622867
          total_loss: -0.013828646019101143
          vf_explained_var: 0.9996328949928284
          vf_loss: 0.012340107032408317
    num_agent_steps_sampled: 355644
    num_agent_steps_trained: 355644
    num_steps_sampled: 355644
    num_steps_trained: 355644
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,178,4604.61,355644,20.7466,21.1,18.63,96.93


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 357642
  custom_metrics: {}
  date: 2021-11-13_22-46-42
  done: false
  episode_len_mean: 96.77
  episode_media: {}
  episode_reward_max: 21.049999999999937
  episode_reward_mean: 20.75809999999992
  episode_reward_min: 18.77999999999992
  episodes_this_iter: 21
  episodes_total: 3454
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2112001560983203
          entropy_coeff: 0.009999999999999998
          kl: 0.007896384195369895
          policy_loss: -0.022536731795186088
          total_loss: 0.014284047626313709
          vf_explained_var: 0.9986748099327087
          vf_loss: 0.044935238616363636
    num_agent_steps_sampled: 357642
    num_agent_steps_trained: 357642
    num_steps_sampled: 357642
    num_steps_trained: 357642
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,179,4629.14,357642,20.7581,21.05,18.78,96.77


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 359640
  custom_metrics: {}
  date: 2021-11-13_22-47-06
  done: false
  episode_len_mean: 96.38
  episode_media: {}
  episode_reward_max: 21.089999999999957
  episode_reward_mean: 20.747999999999923
  episode_reward_min: 14.540000000000017
  episodes_this_iter: 21
  episodes_total: 3475
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2526274221284048
          entropy_coeff: 0.009999999999999998
          kl: 0.012531667305806585
          policy_loss: -0.003810825127930868
          total_loss: 0.11581984205348861
          vf_explained_var: 0.9967610836029053
          vf_loss: 0.12581278620997355
    num_agent_steps_sampled: 359640
    num_agent_steps_trained: 359640
    num_steps_sampled: 359640
    num_steps_trained: 359640
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,180,4653.28,359640,20.748,21.09,14.54,96.38


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 361638
  custom_metrics: {}
  date: 2021-11-13_22-47-31
  done: false
  episode_len_mean: 95.09
  episode_media: {}
  episode_reward_max: 21.099999999999937
  episode_reward_mean: 20.697899999999933
  episode_reward_min: 14.540000000000017
  episodes_this_iter: 21
  episodes_total: 3496
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.251318898087456
          entropy_coeff: 0.009999999999999998
          kl: 0.012695651846603524
          policy_loss: -0.003236741874189604
          total_loss: 0.07454321945884398
          vf_explained_var: 0.9975591897964478
          vf_loss: 0.08386597605865626
    num_agent_steps_sampled: 361638
    num_agent_steps_trained: 361638
    num_steps_sampled: 361638
    num_steps_trained: 361638
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,181,4677.82,361638,20.6979,21.1,14.54,95.09


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 363636
  custom_metrics: {}
  date: 2021-11-13_22-47-55
  done: false
  episode_len_mean: 94.75
  episode_media: {}
  episode_reward_max: 21.099999999999937
  episode_reward_mean: 20.71399999999993
  episode_reward_min: 14.540000000000017
  episodes_this_iter: 21
  episodes_total: 3517
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.3489702968370347
          entropy_coeff: 0.009999999999999998
          kl: 0.009737272639233885
          policy_loss: 0.009661857624139104
          total_loss: 0.05614241819296564
          vf_explained_var: 0.9985034465789795
          vf_loss: 0.05504076865368656
    num_agent_steps_sampled: 363636
    num_agent_steps_trained: 363636
    num_steps_sampled: 363636
    num_steps_trained: 363636
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,182,4702.29,363636,20.714,21.1,14.54,94.75


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 365634
  custom_metrics: {}
  date: 2021-11-13_22-48-19
  done: false
  episode_len_mean: 93.99
  episode_media: {}
  episode_reward_max: 21.09999999999994
  episode_reward_mean: 20.714699999999933
  episode_reward_min: 14.540000000000017
  episodes_this_iter: 21
  episodes_total: 3538
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.290966256459554
          entropy_coeff: 0.009999999999999998
          kl: 0.009041866560473578
          policy_loss: -0.04106490686535835
          total_loss: -0.028260610261488527
          vf_explained_var: 0.9994357228279114
          vf_loss: 0.021136513791446175
    num_agent_steps_sampled: 365634
    num_agent_steps_trained: 365634
    num_steps_sampled: 365634
    num_steps_trained: 365634
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,183,4726.33,365634,20.7147,21.1,14.54,93.99




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 367632
  custom_metrics: {}
  date: 2021-11-13_22-48-58
  done: false
  episode_len_mean: 94.18
  episode_media: {}
  episode_reward_max: 21.09999999999994
  episode_reward_mean: 20.47839999999993
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 22
  episodes_total: 3560
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2620480588504246
          entropy_coeff: 0.009999999999999998
          kl: 0.011093633881513739
          policy_loss: -0.0036645737077508653
          total_loss: 0.8059903232935106
          vf_explained_var: 0.9767251014709473
          vf_loss: 0.8166592191106506
    num_agent_steps_sampled: 367632
    num_agent_steps_trained: 367632
    num_steps_sampled: 367632
    num_steps_trained: 367632
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,184,4765.3,367632,20.4784,21.1,-0.06,94.18




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 369630
  custom_metrics: {}
  date: 2021-11-13_22-49-39
  done: false
  episode_len_mean: 93.85
  episode_media: {}
  episode_reward_max: 21.109999999999946
  episode_reward_mean: 20.561999999999934
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 3581
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2766052876199996
          entropy_coeff: 0.009999999999999998
          kl: 0.009195606785842428
          policy_loss: -0.052305108007221
          total_loss: -0.017687808331989106
          vf_explained_var: 0.9987440705299377
          vf_loss: 0.04272807400806674
    num_agent_steps_sampled: 369630
    num_agent_steps_trained: 369630
    num_steps_sampled: 369630
    num_steps_trained: 369630
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,185,4806.45,369630,20.562,21.11,-0.06,93.85




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 371628
  custom_metrics: {}
  date: 2021-11-13_22-50-18
  done: false
  episode_len_mean: 93.36
  episode_media: {}
  episode_reward_max: 21.109999999999946
  episode_reward_mean: 20.414499999999933
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 22
  episodes_total: 3603
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2566814553169978
          entropy_coeff: 0.009999999999999998
          kl: 0.007505065769859123
          policy_loss: -0.0013893210994345801
          total_loss: 0.5806777418369339
          vf_explained_var: 0.982631266117096
          vf_loss: 0.590834445550683
    num_agent_steps_sampled: 371628
    num_agent_steps_trained: 371628
    num_steps_sampled: 371628
    num_steps_trained: 371628
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,186,4844.63,371628,20.4145,21.11,-0.06,93.36


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 373626
  custom_metrics: {}
  date: 2021-11-13_22-50-42
  done: false
  episode_len_mean: 94.25
  episode_media: {}
  episode_reward_max: 21.109999999999946
  episode_reward_mean: 20.36489999999993
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 3624
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.3334094382467725
          entropy_coeff: 0.009999999999999998
          kl: 0.01120022980245501
          policy_loss: -0.0316352311166979
          total_loss: 0.010821834206581116
          vf_explained_var: 0.9987092614173889
          vf_loss: 0.05012104308587455
    num_agent_steps_sampled: 373626
    num_agent_steps_trained: 373626
    num_steps_sampled: 373626
    num_steps_trained: 373626
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,187,4868.82,373626,20.3649,21.11,-0.06,94.25


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 375624
  custom_metrics: {}
  date: 2021-11-13_22-51-06
  done: false
  episode_len_mean: 95.1
  episode_media: {}
  episode_reward_max: 21.109999999999946
  episode_reward_mean: 20.58109999999993
  episode_reward_min: 3.95
  episodes_this_iter: 21
  episodes_total: 3645
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2385528467950366
          entropy_coeff: 0.009999999999999998
          kl: 0.015213636973150766
          policy_loss: -0.002867768376710869
          total_loss: 0.17454474124436578
          vf_explained_var: 0.9955179691314697
          vf_loss: 0.18209613077785997
    num_agent_steps_sampled: 375624
    num_agent_steps_trained: 375624
    num_steps_sampled: 375624
    num_steps_trained: 375624
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,188,4893,375624,20.5811,21.11,3.95,95.1


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 377622
  custom_metrics: {}
  date: 2021-11-13_22-51-30
  done: false
  episode_len_mean: 94.73
  episode_media: {}
  episode_reward_max: 21.109999999999946
  episode_reward_mean: 20.550299999999933
  episode_reward_min: 3.95
  episodes_this_iter: 21
  episodes_total: 3666
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2298653880755106
          entropy_coeff: 0.009999999999999998
          kl: 0.007297763353627201
          policy_loss: 0.0015190112448873975
          total_loss: 0.05831919241519201
          vf_explained_var: 0.9981507062911987
          vf_loss: 0.06540434125456072
    num_agent_steps_sampled: 377622
    num_agent_steps_trained: 377622
    num_steps_sampled: 377622
    num_steps_trained: 377622
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,189,4917.14,377622,20.5503,21.11,3.95,94.73


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 379620
  custom_metrics: {}
  date: 2021-11-13_22-51-53
  done: false
  episode_len_mean: 95.57
  episode_media: {}
  episode_reward_max: 21.08999999999994
  episode_reward_mean: 20.495699999999932
  episode_reward_min: 3.95
  episodes_this_iter: 21
  episodes_total: 3687
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2626732048534213
          entropy_coeff: 0.009999999999999998
          kl: 0.005239185883976442
          policy_loss: -0.11023431633199965
          total_loss: -0.07985838770511605
          vf_explained_var: 0.9989000558853149
          vf_loss: 0.04035032409065891
    num_agent_steps_sampled: 379620
    num_agent_steps_trained: 379620
    num_steps_sampled: 379620
    num_steps_trained: 379620
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,190,4940.18,379620,20.4957,21.09,3.95,95.57


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 381618
  custom_metrics: {}
  date: 2021-11-13_22-52-17
  done: false
  episode_len_mean: 96.06
  episode_media: {}
  episode_reward_max: 21.08999999999994
  episode_reward_mean: 20.584899999999926
  episode_reward_min: 11.180000000000012
  episodes_this_iter: 20
  episodes_total: 3707
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2344542335896265
          entropy_coeff: 0.009999999999999998
          kl: 0.019354473223695556
          policy_loss: -0.008001967005076863
          total_loss: 0.11335815915039607
          vf_explained_var: 0.9962284564971924
          vf_loss: 0.12390647036511274
    num_agent_steps_sampled: 381618
    num_agent_steps_trained: 381618
    num_steps_sampled: 381618
    num_steps_trained: 381618
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,191,4964.05,381618,20.5849,21.09,11.18,96.06


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 383616
  custom_metrics: {}
  date: 2021-11-13_22-52-42
  done: false
  episode_len_mean: 95.48
  episode_media: {}
  episode_reward_max: 21.08999999999994
  episode_reward_mean: 20.619399999999928
  episode_reward_min: 11.180000000000012
  episodes_this_iter: 22
  episodes_total: 3729
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2021193791003455
          entropy_coeff: 0.009999999999999998
          kl: 0.007548639189002064
          policy_loss: -0.008800987713038921
          total_loss: 0.03958701265177556
          vf_explained_var: 0.998400866985321
          vf_loss: 0.05658769528159783
    num_agent_steps_sampled: 383616
    num_agent_steps_trained: 383616
    num_steps_sampled: 383616
    num_steps_trained: 383616
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,192,4988.74,383616,20.6194,21.09,11.18,95.48


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 385614
  custom_metrics: {}
  date: 2021-11-13_22-53-06
  done: false
  episode_len_mean: 95.63
  episode_media: {}
  episode_reward_max: 21.099999999999948
  episode_reward_mean: 20.53679999999993
  episode_reward_min: 11.180000000000012
  episodes_this_iter: 20
  episodes_total: 3749
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2293483172144208
          entropy_coeff: 0.009999999999999998
          kl: 0.014679668697506433
          policy_loss: 0.006424665876797267
          total_loss: 0.12197813710996083
          vf_explained_var: 0.9969422817230225
          vf_loss: 0.12041537388715716
    num_agent_steps_sampled: 385614
    num_agent_steps_trained: 385614
    num_steps_sampled: 385614
    num_steps_trained: 385614
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,193,5012.8,385614,20.5368,21.1,11.18,95.63


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 387612
  custom_metrics: {}
  date: 2021-11-13_22-53-31
  done: false
  episode_len_mean: 95.4
  episode_media: {}
  episode_reward_max: 21.119999999999948
  episode_reward_mean: 20.547099999999933
  episode_reward_min: 11.180000000000012
  episodes_this_iter: 21
  episodes_total: 3770
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2448880638395037
          entropy_coeff: 0.009999999999999998
          kl: 0.006138226270090878
          policy_loss: 0.006634902111476376
          total_loss: 0.024114970490336417
          vf_explained_var: 0.9992703199386597
          vf_loss: 0.026821471336075948
    num_agent_steps_sampled: 387612
    num_agent_steps_trained: 387612
    num_steps_sampled: 387612
    num_steps_trained: 387612
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,194,5037.36,387612,20.5471,21.12,11.18,95.4


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 389610
  custom_metrics: {}
  date: 2021-11-13_22-53-54
  done: false
  episode_len_mean: 95.05
  episode_media: {}
  episode_reward_max: 21.119999999999948
  episode_reward_mean: 20.652799999999935
  episode_reward_min: 12.790000000000015
  episodes_this_iter: 22
  episodes_total: 3792
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2233990305945988
          entropy_coeff: 0.009999999999999998
          kl: 0.009496242198408061
          policy_loss: -0.005507788984548478
          total_loss: 0.04041206325803484
          vf_explained_var: 0.9984774589538574
          vf_loss: 0.05334636791653576
    num_agent_steps_sampled: 389610
    num_agent_steps_trained: 389610
    num_steps_sampled: 389610
    num_steps_trained: 389610
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,195,5061.01,389610,20.6528,21.12,12.79,95.05


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 391608
  custom_metrics: {}
  date: 2021-11-13_22-54-19
  done: false
  episode_len_mean: 94.51
  episode_media: {}
  episode_reward_max: 21.119999999999948
  episode_reward_mean: 20.58019999999993
  episode_reward_min: 12.790000000000015
  episodes_this_iter: 21
  episodes_total: 3813
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2396707250958396
          entropy_coeff: 0.009999999999999998
          kl: 0.012656572571900308
          policy_loss: 0.0020966202730224245
          total_loss: 0.11522797124371642
          vf_explained_var: 0.99701988697052
          vf_loss: 0.11912067103244009
    num_agent_steps_sampled: 391608
    num_agent_steps_trained: 391608
    num_steps_sampled: 391608
    num_steps_trained: 391608
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,196,5085.46,391608,20.5802,21.12,12.79,94.51


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 393606
  custom_metrics: {}
  date: 2021-11-13_22-54-43
  done: false
  episode_len_mean: 94.43
  episode_media: {}
  episode_reward_max: 21.119999999999948
  episode_reward_mean: 20.56909999999993
  episode_reward_min: 12.790000000000015
  episodes_this_iter: 21
  episodes_total: 3834
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2546740117527189
          entropy_coeff: 0.009999999999999998
          kl: 0.006076281719002376
          policy_loss: 0.008064871352343333
          total_loss: 0.10189049967697689
          vf_explained_var: 0.9971746802330017
          vf_loss: 0.10329624481853984
    num_agent_steps_sampled: 393606
    num_agent_steps_trained: 393606
    num_steps_sampled: 393606
    num_steps_trained: 393606
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,197,5109.27,393606,20.5691,21.12,12.79,94.43


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 395604
  custom_metrics: {}
  date: 2021-11-13_22-55-07
  done: false
  episode_len_mean: 94.19
  episode_media: {}
  episode_reward_max: 21.119999999999948
  episode_reward_mean: 20.615999999999936
  episode_reward_min: 16.739999999999924
  episodes_this_iter: 21
  episodes_total: 3855
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2694830627668472
          entropy_coeff: 0.009999999999999998
          kl: 0.006935922830859735
          policy_loss: -0.05411180935445286
          total_loss: -0.009467691236308643
          vf_explained_var: 0.9984966516494751
          vf_loss: 0.05382763885538138
    num_agent_steps_sampled: 395604
    num_agent_steps_trained: 395604
    num_steps_sampled: 395604
    num_steps_trained: 395604
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,198,5133.37,395604,20.616,21.12,16.74,94.19


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 397602
  custom_metrics: {}
  date: 2021-11-13_22-55-30
  done: false
  episode_len_mean: 94.85
  episode_media: {}
  episode_reward_max: 21.109999999999946
  episode_reward_mean: 20.612899999999932
  episode_reward_min: 16.739999999999924
  episodes_this_iter: 21
  episodes_total: 3876
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2911335394496009
          entropy_coeff: 0.009999999999999998
          kl: 0.009472799024342175
          policy_loss: -0.01951264606877452
          total_loss: 0.02179146393956173
          vf_explained_var: 0.9984651803970337
          vf_loss: 0.04941983928549148
    num_agent_steps_sampled: 397602
    num_agent_steps_trained: 397602
    num_steps_sampled: 397602
    num_steps_trained: 397602
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,199,5157.04,397602,20.6129,21.11,16.74,94.85


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 399600
  custom_metrics: {}
  date: 2021-11-13_22-55-55
  done: false
  episode_len_mean: 95.47
  episode_media: {}
  episode_reward_max: 21.109999999999946
  episode_reward_mean: 20.60509999999993
  episode_reward_min: 16.739999999999924
  episodes_this_iter: 21
  episodes_total: 3897
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2909392192250206
          entropy_coeff: 0.009999999999999998
          kl: 0.008634652440178496
          policy_loss: -0.04359191230365208
          total_loss: 0.06558879690156097
          vf_explained_var: 0.9967056512832642
          vf_loss: 0.11771881075664645
    num_agent_steps_sampled: 399600
    num_agent_steps_trained: 399600
    num_steps_sampled: 399600
    num_steps_trained: 399600
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,200,5181.5,399600,20.6051,21.11,16.74,95.47




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 401598
  custom_metrics: {}
  date: 2021-11-13_22-56-33
  done: false
  episode_len_mean: 94.97
  episode_media: {}
  episode_reward_max: 21.109999999999946
  episode_reward_mean: 20.41469999999993
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 22
  episodes_total: 3919
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2814939936002097
          entropy_coeff: 0.009999999999999998
          kl: 0.007592132735880148
          policy_loss: -0.011625124585060846
          total_loss: 0.7841971338327441
          vf_explained_var: 0.9740095138549805
          vf_loss: 0.8047936881049758
    num_agent_steps_sampled: 401598
    num_agent_steps_trained: 401598
    num_steps_sampled: 401598
    num_steps_trained: 401598
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,201,5219.77,401598,20.4147,21.11,-0.06,94.97




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 403596
  custom_metrics: {}
  date: 2021-11-13_22-57-28
  done: false
  episode_len_mean: 93.25
  episode_media: {}
  episode_reward_max: 21.039999999999935
  episode_reward_mean: 20.070599999999928
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 23
  episodes_total: 3942
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.248496541522798
          entropy_coeff: 0.009999999999999998
          kl: 0.0026579919280527798
          policy_loss: 0.011007638727980001
          total_loss: 1.1432408090880406
          vf_explained_var: 0.964552640914917
          vf_loss: 1.1433725338606608
    num_agent_steps_sampled: 403596
    num_agent_steps_trained: 403596
    num_steps_sampled: 403596
    num_steps_trained: 403596
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,202,5274.29,403596,20.0706,21.04,-0.06,93.25


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 405594
  custom_metrics: {}
  date: 2021-11-13_22-57-53
  done: false
  episode_len_mean: 92.99
  episode_media: {}
  episode_reward_max: 21.079999999999977
  episode_reward_mean: 20.033699999999932
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 3963
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.278113271508898
          entropy_coeff: 0.009999999999999998
          kl: 0.006296365942425197
          policy_loss: -0.08171697333455086
          total_loss: -0.04421087469284733
          vf_explained_var: 0.9985867738723755
          vf_loss: 0.04869346393804465
    num_agent_steps_sampled: 405594
    num_agent_steps_trained: 405594
    num_steps_sampled: 405594
    num_steps_trained: 405594
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,203,5299.57,405594,20.0337,21.08,-0.06,92.99


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 407592
  custom_metrics: {}
  date: 2021-11-13_22-58-18
  done: false
  episode_len_mean: 92.18
  episode_media: {}
  episode_reward_max: 21.079999999999977
  episode_reward_mean: 20.038799999999934
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 3984
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.284692385083153
          entropy_coeff: 0.009999999999999998
          kl: 0.008259326619109601
          policy_loss: -0.015011522244839441
          total_loss: 0.007843636065011933
          vf_explained_var: 0.9990639686584473
          vf_loss: 0.033611438587485325
    num_agent_steps_sampled: 407592
    num_agent_steps_trained: 407592
    num_steps_sampled: 407592
    num_steps_trained: 407592
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,204,5324.56,407592,20.0388,21.08,-0.06,92.18


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 409590
  custom_metrics: {}
  date: 2021-11-13_22-58-42
  done: false
  episode_len_mean: 93.62
  episode_media: {}
  episode_reward_max: 21.079999999999977
  episode_reward_mean: 20.29149999999993
  episode_reward_min: -0.05
  episodes_this_iter: 20
  episodes_total: 4004
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.306741243884677
          entropy_coeff: 0.009999999999999998
          kl: 0.014053190039910238
          policy_loss: -0.0018182567542507535
          total_loss: 0.0257154714848314
          vf_explained_var: 0.9989262223243713
          vf_loss: 0.03704392832393447
    num_agent_steps_sampled: 409590
    num_agent_steps_trained: 409590
    num_steps_sampled: 409590
    num_steps_trained: 409590
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,205,5347.92,409590,20.2915,21.08,-0.05,93.62


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 411588
  custom_metrics: {}
  date: 2021-11-13_22-59-05
  done: false
  episode_len_mean: 93.49
  episode_media: {}
  episode_reward_max: 21.079999999999977
  episode_reward_mean: 20.30729999999993
  episode_reward_min: -0.05
  episodes_this_iter: 22
  episodes_total: 4026
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.252884506611597
          entropy_coeff: 0.009999999999999998
          kl: 0.012750960386963734
          policy_loss: 0.0010858001719628062
          total_loss: 0.06429730245089602
          vf_explained_var: 0.9979372024536133
          vf_loss: 0.07251276086040195
    num_agent_steps_sampled: 411588
    num_agent_steps_trained: 411588
    num_steps_sampled: 411588
    num_steps_trained: 411588
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,206,5371.79,411588,20.3073,21.08,-0.05,93.49


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 413586
  custom_metrics: {}
  date: 2021-11-13_22-59-30
  done: false
  episode_len_mean: 95.56
  episode_media: {}
  episode_reward_max: 21.079999999999977
  episode_reward_mean: 20.67759999999993
  episode_reward_min: 17.16000000000001
  episodes_this_iter: 19
  episodes_total: 4045
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2852584112258185
          entropy_coeff: 0.009999999999999998
          kl: 0.00939681177043456
          policy_loss: -0.008620868765172504
          total_loss: 0.02080300815758251
          vf_explained_var: 0.9989240169525146
          vf_loss: 0.03989789232700353
    num_agent_steps_sampled: 413586
    num_agent_steps_trained: 413586
    num_steps_sampled: 413586
    num_steps_trained: 413586
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,207,5395.94,413586,20.6776,21.08,17.16,95.56


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 415584
  custom_metrics: {}
  date: 2021-11-13_22-59-53
  done: false
  episode_len_mean: 96.53
  episode_media: {}
  episode_reward_max: 21.05999999999994
  episode_reward_mean: 20.724399999999925
  episode_reward_min: 18.319999999999908
  episodes_this_iter: 20
  episodes_total: 4065
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2033189308075678
          entropy_coeff: 0.009999999999999998
          kl: 0.014727071852304255
          policy_loss: 0.001399781537197885
          total_loss: 0.10306764138596398
          vf_explained_var: 0.9965757131576538
          vf_loss: 0.10997325941210702
    num_agent_steps_sampled: 415584
    num_agent_steps_trained: 415584
    num_steps_sampled: 415584
    num_steps_trained: 415584
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,208,5418.94,415584,20.7244,21.06,18.32,96.53


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 417582
  custom_metrics: {}
  date: 2021-11-13_23-00-16
  done: false
  episode_len_mean: 97.39
  episode_media: {}
  episode_reward_max: 21.05999999999994
  episode_reward_mean: 20.678199999999926
  episode_reward_min: 16.839999999999947
  episodes_this_iter: 21
  episodes_total: 4086
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2769645004045396
          entropy_coeff: 0.009999999999999998
          kl: 0.010768378282759365
          policy_loss: -0.011039703534472556
          total_loss: 0.025971368469652676
          vf_explained_var: 0.9985650777816772
          vf_loss: 0.04705497245969517
    num_agent_steps_sampled: 417582
    num_agent_steps_trained: 417582
    num_steps_sampled: 417582
    num_steps_trained: 417582
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,209,5442.21,417582,20.6782,21.06,16.84,97.39


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 419580
  custom_metrics: {}
  date: 2021-11-13_23-00-40
  done: false
  episode_len_mean: 97.37
  episode_media: {}
  episode_reward_max: 21.05999999999994
  episode_reward_mean: 20.647599999999926
  episode_reward_min: 16.70999999999995
  episodes_this_iter: 21
  episodes_total: 4107
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3002895922887894
          entropy_coeff: 0.009999999999999998
          kl: 0.01679548035699946
          policy_loss: -0.010485743518386569
          total_loss: 0.08628491730917068
          vf_explained_var: 0.9969220757484436
          vf_loss: 0.10552219813246103
    num_agent_steps_sampled: 419580
    num_agent_steps_trained: 419580
    num_steps_sampled: 419580
    num_steps_trained: 419580
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,210,5466.18,419580,20.6476,21.06,16.71,97.37


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 421578
  custom_metrics: {}
  date: 2021-11-13_23-01-04
  done: false
  episode_len_mean: 97.57
  episode_media: {}
  episode_reward_max: 21.049999999999937
  episode_reward_mean: 20.47269999999993
  episode_reward_min: 10.740000000000016
  episodes_this_iter: 20
  episodes_total: 4127
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3049848238627115
          entropy_coeff: 0.009999999999999998
          kl: 0.019989684120448367
          policy_loss: -0.017935525342112495
          total_loss: 0.4086260495529998
          vf_explained_var: 0.9849757552146912
          vf_loss: 0.4345515325221987
    num_agent_steps_sampled: 421578
    num_agent_steps_trained: 421578
    num_steps_sampled: 421578
    num_steps_trained: 421578
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,211,5489.63,421578,20.4727,21.05,10.74,97.57


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 423576
  custom_metrics: {}
  date: 2021-11-13_23-01-26
  done: false
  episode_len_mean: 97.88
  episode_media: {}
  episode_reward_max: 21.049999999999937
  episode_reward_mean: 20.517699999999927
  episode_reward_min: 10.740000000000016
  episodes_this_iter: 21
  episodes_total: 4148
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2481354622613816
          entropy_coeff: 0.009999999999999998
          kl: 0.011114490701591616
          policy_loss: -0.029461252370050976
          total_loss: -0.0007699346258526757
          vf_explained_var: 0.9989082217216492
          vf_loss: 0.038359318753438336
    num_agent_steps_sampled: 423576
    num_agent_steps_trained: 423576
    num_steps_sampled: 423576
    num_steps_trained: 423576
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,212,5512.51,423576,20.5177,21.05,10.74,97.88


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 425574
  custom_metrics: {}
  date: 2021-11-13_23-01-51
  done: false
  episode_len_mean: 97.32
  episode_media: {}
  episode_reward_max: 21.129999999999963
  episode_reward_mean: 20.486799999999924
  episode_reward_min: 10.740000000000016
  episodes_this_iter: 21
  episodes_total: 4169
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2319930757795061
          entropy_coeff: 0.009999999999999998
          kl: 0.009269710960033734
          policy_loss: -0.06782007566874936
          total_loss: -0.009401091529677311
          vf_explained_var: 0.9979977011680603
          vf_loss: 0.06839252097443456
    num_agent_steps_sampled: 425574
    num_agent_steps_trained: 425574
    num_steps_sampled: 425574
    num_steps_trained: 425574
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,213,5537.52,425574,20.4868,21.13,10.74,97.32


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 427572
  custom_metrics: {}
  date: 2021-11-13_23-02-16
  done: false
  episode_len_mean: 96.38
  episode_media: {}
  episode_reward_max: 21.129999999999963
  episode_reward_mean: 20.471299999999932
  episode_reward_min: 10.740000000000016
  episodes_this_iter: 21
  episodes_total: 4190
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1966089770907447
          entropy_coeff: 0.009999999999999998
          kl: 0.018160965999105702
          policy_loss: -0.004924264408293224
          total_loss: 0.07283722591542062
          vf_explained_var: 0.9976999759674072
          vf_loss: 0.0851305842266551
    num_agent_steps_sampled: 427572
    num_agent_steps_trained: 427572
    num_steps_sampled: 427572
    num_steps_trained: 427572
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,214,5561.65,427572,20.4713,21.13,10.74,96.38


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 429570
  custom_metrics: {}
  date: 2021-11-13_23-02-40
  done: false
  episode_len_mean: 96.08
  episode_media: {}
  episode_reward_max: 21.129999999999963
  episode_reward_mean: 20.603999999999928
  episode_reward_min: 10.740000000000016
  episodes_this_iter: 21
  episodes_total: 4211
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2874976776895068
          entropy_coeff: 0.009999999999999998
          kl: 0.009057510788622719
          policy_loss: -0.011562677579266684
          total_loss: 0.0001419966242143086
          vf_explained_var: 0.9993030428886414
          vf_loss: 0.022286967720304217
    num_agent_steps_sampled: 429570
    num_agent_steps_trained: 429570
    num_steps_sampled: 429570
    num_steps_trained: 429570
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,215,5585.91,429570,20.604,21.13,10.74,96.08


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 431568
  custom_metrics: {}
  date: 2021-11-13_23-03-04
  done: false
  episode_len_mean: 96.08
  episode_media: {}
  episode_reward_max: 21.129999999999963
  episode_reward_mean: 20.746499999999926
  episode_reward_min: 16.559999999999953
  episodes_this_iter: 21
  episodes_total: 4232
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3371946437018258
          entropy_coeff: 0.009999999999999998
          kl: 0.008261226669106123
          policy_loss: -0.029888963069589367
          total_loss: -0.02291886315991481
          vf_explained_var: 0.9994333386421204
          vf_loss: 0.018250921925175047
    num_agent_steps_sampled: 431568
    num_agent_steps_trained: 431568
    num_steps_sampled: 431568
    num_steps_trained: 431568
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,216,5610.41,431568,20.7465,21.13,16.56,96.08


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 433566
  custom_metrics: {}
  date: 2021-11-13_23-03-28
  done: false
  episode_len_mean: 95.61
  episode_media: {}
  episode_reward_max: 21.129999999999963
  episode_reward_mean: 20.78979999999993
  episode_reward_min: 16.599999999999977
  episodes_this_iter: 20
  episodes_total: 4252
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2859829857235863
          entropy_coeff: 0.009999999999999998
          kl: 0.015060631152203963
          policy_loss: -0.022999151122002376
          total_loss: 0.00030836258970555805
          vf_explained_var: 0.99921715259552
          vf_loss: 0.032355120004199094
    num_agent_steps_sampled: 433566
    num_agent_steps_trained: 433566
    num_steps_sampled: 433566
    num_steps_trained: 433566
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,217,5634.33,433566,20.7898,21.13,16.6,95.61




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 435564
  custom_metrics: {}
  date: 2021-11-13_23-04-08
  done: false
  episode_len_mean: 96.67
  episode_media: {}
  episode_reward_max: 21.049999999999933
  episode_reward_mean: 20.790599999999927
  episode_reward_min: 16.959999999999933
  episodes_this_iter: 19
  episodes_total: 4271
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3088393177304949
          entropy_coeff: 0.009999999999999998
          kl: 0.00915684656222889
          policy_loss: -0.08540358719016825
          total_loss: -0.0773405751834313
          vf_explained_var: 0.9994468092918396
          vf_loss: 0.01883357836687494
    num_agent_steps_sampled: 435564
    num_agent_steps_trained: 435564
    num_steps_sampled: 435564
    num_steps_trained: 435564
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,218,5674.02,435564,20.7906,21.05,16.96,96.67




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 437562
  custom_metrics: {}
  date: 2021-11-13_23-04-49
  done: false
  episode_len_mean: 96.65
  episode_media: {}
  episode_reward_max: 21.049999999999933
  episode_reward_mean: 20.612899999999925
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 22
  episodes_total: 4293
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2058549097606115
          entropy_coeff: 0.009999999999999998
          kl: 0.01239050014019572
          policy_loss: 0.004866285342723131
          total_loss: 0.8401114689008821
          vf_explained_var: 0.9777026772499084
          vf_loss: 0.8441673815959976
    num_agent_steps_sampled: 437562
    num_agent_steps_trained: 437562
    num_steps_sampled: 437562
    num_steps_trained: 437562
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,219,5714.94,437562,20.6129,21.05,-0.06,96.65




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 439560
  custom_metrics: {}
  date: 2021-11-13_23-05-26
  done: false
  episode_len_mean: 96.84
  episode_media: {}
  episode_reward_max: 21.049999999999933
  episode_reward_mean: 20.378599999999928
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 4312
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4140177283968245
          entropy_coeff: 0.009999999999999998
          kl: 0.008696141384880885
          policy_loss: -0.04563971232800257
          total_loss: 0.11603626949446542
          vf_explained_var: 0.9949465990066528
          vf_loss: 0.1736149471918387
    num_agent_steps_sampled: 439560
    num_agent_steps_trained: 439560
    num_steps_sampled: 439560
    num_steps_trained: 439560
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,220,5752.18,439560,20.3786,21.05,-0.06,96.84


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 441558
  custom_metrics: {}
  date: 2021-11-13_23-05-50
  done: false
  episode_len_mean: 98.23
  episode_media: {}
  episode_reward_max: 21.029999999999944
  episode_reward_mean: 20.321299999999923
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 21
  episodes_total: 4333
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3316350374902999
          entropy_coeff: 0.009999999999999998
          kl: 0.012336262417109702
          policy_loss: -0.010524643593955607
          total_loss: 0.022357244412636472
          vf_explained_var: 0.9987398982048035
          vf_loss: 0.043075622937508995
    num_agent_steps_sampled: 441558
    num_agent_steps_trained: 441558
    num_steps_sampled: 441558
    num_steps_trained: 441558
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,221,5775.9,441558,20.3213,21.03,-0.06,98.23


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 443556
  custom_metrics: {}
  date: 2021-11-13_23-06-13
  done: false
  episode_len_mean: 99.44
  episode_media: {}
  episode_reward_max: 21.05999999999994
  episode_reward_mean: 20.32549999999992
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 4353
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3434124100775946
          entropy_coeff: 0.009999999999999998
          kl: 0.009299920823413872
          policy_loss: -0.006772984311516796
          total_loss: 0.007073263744158404
          vf_explained_var: 0.9992102980613708
          vf_loss: 0.024926327517078746
    num_agent_steps_sampled: 443556
    num_agent_steps_trained: 443556
    num_steps_sampled: 443556
    num_steps_trained: 443556
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,222,5798.54,443556,20.3255,21.06,-0.06,99.44


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 445554
  custom_metrics: {}
  date: 2021-11-13_23-06-36
  done: false
  episode_len_mean: 99.67
  episode_media: {}
  episode_reward_max: 21.05999999999994
  episode_reward_mean: 20.332499999999918
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 19
  episodes_total: 4372
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.315574053923289
          entropy_coeff: 0.009999999999999998
          kl: 0.014020193717212465
          policy_loss: -0.011960559759643816
          total_loss: 0.0080210261401676
          vf_explained_var: 0.9990760684013367
          vf_loss: 0.029588462769364318
    num_agent_steps_sampled: 445554
    num_agent_steps_trained: 445554
    num_steps_sampled: 445554
    num_steps_trained: 445554
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,223,5821.34,445554,20.3325,21.06,-0.06,99.67


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 447552
  custom_metrics: {}
  date: 2021-11-13_23-06-58
  done: false
  episode_len_mean: 101.16
  episode_media: {}
  episode_reward_max: 21.05999999999994
  episode_reward_mean: 20.531999999999922
  episode_reward_min: -0.01
  episodes_this_iter: 19
  episodes_total: 4391
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3056091694604783
          entropy_coeff: 0.009999999999999998
          kl: 0.017426840072202892
          policy_loss: -0.0074750275927640145
          total_loss: 0.018008525186173973
          vf_explained_var: 0.999004065990448
          vf_loss: 0.034128477384469336
    num_agent_steps_sampled: 447552
    num_agent_steps_trained: 447552
    num_steps_sampled: 447552
    num_steps_trained: 447552
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,224,5844.09,447552,20.532,21.06,-0.01,101.16


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 449550
  custom_metrics: {}
  date: 2021-11-13_23-07-21
  done: false
  episode_len_mean: 101.27
  episode_media: {}
  episode_reward_max: 21.05999999999994
  episode_reward_mean: 20.68749999999992
  episode_reward_min: 18.649999999999903
  episodes_this_iter: 21
  episodes_total: 4412
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2731538925852095
          entropy_coeff: 0.009999999999999998
          kl: 0.01507803987971334
          policy_loss: 0.01847659959679558
          total_loss: 0.06735974454454013
          vf_explained_var: 0.9983228445053101
          vf_loss: 0.05779805531547893
    num_agent_steps_sampled: 449550
    num_agent_steps_trained: 449550
    num_steps_sampled: 449550
    num_steps_trained: 449550
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,225,5867.09,449550,20.6875,21.06,18.65,101.27


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 451548
  custom_metrics: {}
  date: 2021-11-13_23-07-44
  done: false
  episode_len_mean: 101.82
  episode_media: {}
  episode_reward_max: 21.05999999999994
  episode_reward_mean: 20.68769999999992
  episode_reward_min: 18.649999999999903
  episodes_this_iter: 19
  episodes_total: 4431
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.357738295055571
          entropy_coeff: 0.009999999999999998
          kl: 0.008421629734432725
          policy_loss: -0.08334778485198815
          total_loss: -0.05183669321593784
          vf_explained_var: 0.9988490343093872
          vf_loss: 0.04295674853159913
    num_agent_steps_sampled: 451548
    num_agent_steps_trained: 451548
    num_steps_sampled: 451548
    num_steps_trained: 451548
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,226,5889.16,451548,20.6877,21.06,18.65,101.82


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 453546
  custom_metrics: {}
  date: 2021-11-13_23-08-07
  done: false
  episode_len_mean: 101.06
  episode_media: {}
  episode_reward_max: 21.05999999999994
  episode_reward_mean: 20.65289999999992
  episode_reward_min: 18.599999999999923
  episodes_this_iter: 20
  episodes_total: 4451
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2417373475574311
          entropy_coeff: 0.009999999999999998
          kl: 0.014841322641662777
          policy_loss: -0.020773185647669293
          total_loss: 0.0276576511207081
          vf_explained_var: 0.9983596801757812
          vf_loss: 0.0570915018341371
    num_agent_steps_sampled: 453546
    num_agent_steps_trained: 453546
    num_steps_sampled: 453546
    num_steps_trained: 453546
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,227,5912.18,453546,20.6529,21.06,18.6,101.06


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 455544
  custom_metrics: {}
  date: 2021-11-13_23-08-31
  done: false
  episode_len_mean: 99.98
  episode_media: {}
  episode_reward_max: 21.089999999999943
  episode_reward_mean: 20.671299999999924
  episode_reward_min: 18.599999999999923
  episodes_this_iter: 21
  episodes_total: 4472
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2973827038492476
          entropy_coeff: 0.009999999999999998
          kl: 0.01020211369499305
          policy_loss: -0.02887563985728082
          total_loss: -0.01799786411048401
          vf_explained_var: 0.9993236660957336
          vf_loss: 0.021269193559973722
    num_agent_steps_sampled: 455544
    num_agent_steps_trained: 455544
    num_steps_sampled: 455544
    num_steps_trained: 455544
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,228,5936.46,455544,20.6713,21.09,18.6,99.98


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 457542
  custom_metrics: {}
  date: 2021-11-13_23-08-54
  done: false
  episode_len_mean: 99.78
  episode_media: {}
  episode_reward_max: 21.089999999999943
  episode_reward_mean: 20.68219999999992
  episode_reward_min: 18.589999999999904
  episodes_this_iter: 19
  episodes_total: 4491
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2606905676069713
          entropy_coeff: 0.009999999999999998
          kl: 0.009887704825890279
          policy_loss: 0.004901735555557977
          total_loss: 0.01757663797054972
          vf_explained_var: 0.9993243217468262
          vf_loss: 0.022778983125906614
    num_agent_steps_sampled: 457542
    num_agent_steps_trained: 457542
    num_steps_sampled: 457542
    num_steps_trained: 457542
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,229,5959.85,457542,20.6822,21.09,18.59,99.78


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 459540
  custom_metrics: {}
  date: 2021-11-13_23-09-17
  done: false
  episode_len_mean: 99.55
  episode_media: {}
  episode_reward_max: 21.089999999999943
  episode_reward_mean: 20.73489999999993
  episode_reward_min: 18.589999999999904
  episodes_this_iter: 21
  episodes_total: 4512
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.260521541890644
          entropy_coeff: 0.009999999999999998
          kl: 0.01166047707321839
          policy_loss: -0.020866529093611808
          total_loss: 0.01200519407256728
          vf_explained_var: 0.9987995028495789
          vf_loss: 0.04252538402742218
    num_agent_steps_sampled: 459540
    num_agent_steps_trained: 459540
    num_steps_sampled: 459540
    num_steps_trained: 459540
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,230,5983.03,459540,20.7349,21.09,18.59,99.55


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 461538
  custom_metrics: {}
  date: 2021-11-13_23-09-41
  done: false
  episode_len_mean: 98.44
  episode_media: {}
  episode_reward_max: 21.089999999999943
  episode_reward_mean: 20.756599999999924
  episode_reward_min: 18.589999999999904
  episodes_this_iter: 21
  episodes_total: 4533
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2493065658069793
          entropy_coeff: 0.009999999999999998
          kl: 0.011435396003738859
          policy_loss: -0.005194380702007385
          total_loss: 0.018855198552565917
          vf_explained_var: 0.9990575909614563
          vf_loss: 0.03364806012250483
    num_agent_steps_sampled: 461538
    num_agent_steps_trained: 461538
    num_steps_sampled: 461538
    num_steps_trained: 461538
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,231,6006.53,461538,20.7566,21.09,18.59,98.44


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 463536
  custom_metrics: {}
  date: 2021-11-13_23-10-04
  done: false
  episode_len_mean: 98.26
  episode_media: {}
  episode_reward_max: 21.089999999999943
  episode_reward_mean: 20.770999999999926
  episode_reward_min: 18.589999999999904
  episodes_this_iter: 20
  episodes_total: 4553
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1820445849781944
          entropy_coeff: 0.009999999999999998
          kl: 0.014290345770257638
          policy_loss: -0.0015040719970351173
          total_loss: 0.042283247358032634
          vf_explained_var: 0.99819016456604
          vf_loss: 0.051990522328941594
    num_agent_steps_sampled: 463536
    num_agent_steps_trained: 463536
    num_steps_sampled: 463536
    num_steps_trained: 463536
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,232,6029.54,463536,20.771,21.09,18.59,98.26


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 465534
  custom_metrics: {}
  date: 2021-11-13_23-10-28
  done: false
  episode_len_mean: 98.11
  episode_media: {}
  episode_reward_max: 21.049999999999937
  episode_reward_mean: 20.758599999999923
  episode_reward_min: 18.589999999999904
  episodes_this_iter: 21
  episodes_total: 4574
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1965032654149192
          entropy_coeff: 0.009999999999999998
          kl: 0.01231672811130073
          policy_loss: -0.013104308325619925
          total_loss: 0.00706722837473665
          vf_explained_var: 0.9991607666015625
          vf_loss: 0.029018898077663922
    num_agent_steps_sampled: 465534
    num_agent_steps_trained: 465534
    num_steps_sampled: 465534
    num_steps_trained: 465534
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,233,6053.48,465534,20.7586,21.05,18.59,98.11


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 467532
  custom_metrics: {}
  date: 2021-11-13_23-10-51
  done: false
  episode_len_mean: 97.22
  episode_media: {}
  episode_reward_max: 21.049999999999937
  episode_reward_mean: 20.795099999999923
  episode_reward_min: 18.709999999999944
  episodes_this_iter: 21
  episodes_total: 4595
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1747800852571215
          entropy_coeff: 0.009999999999999998
          kl: 0.0142555414718198
          policy_loss: -0.016909473424866085
          total_loss: -0.018763662998874983
          vf_explained_var: 0.9998218417167664
          vf_loss: 0.006285176912899174
    num_agent_steps_sampled: 467532
    num_agent_steps_trained: 467532
    num_steps_sampled: 467532
    num_steps_trained: 467532
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,234,6076.64,467532,20.7951,21.05,18.71,97.22




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 469530
  custom_metrics: {}
  date: 2021-11-13_23-11-32
  done: false
  episode_len_mean: 95.65
  episode_media: {}
  episode_reward_max: 21.049999999999937
  episode_reward_mean: 20.658299999999926
  episode_reward_min: 1.94
  episodes_this_iter: 22
  episodes_total: 4617
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2230967328661964
          entropy_coeff: 0.009999999999999998
          kl: 0.012966444451669046
          policy_loss: 0.0005167641898705846
          total_loss: 0.7318451044194045
          vf_explained_var: 0.9782633185386658
          vf_loss: 0.7402771619946829
    num_agent_steps_sampled: 469530
    num_agent_steps_trained: 469530
    num_steps_sampled: 469530
    num_steps_trained: 469530
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,235,6116.97,469530,20.6583,21.05,1.94,95.65




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 471528
  custom_metrics: {}
  date: 2021-11-13_23-12-14
  done: false
  episode_len_mean: 93.92
  episode_media: {}
  episode_reward_max: 21.049999999999937
  episode_reward_mean: 20.344399999999936
  episode_reward_min: 1.94
  episodes_this_iter: 22
  episodes_total: 4639
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.200727291334243
          entropy_coeff: 0.009999999999999998
          kl: 0.019206888404533357
          policy_loss: -0.028390448895238694
          total_loss: 0.1716547325785671
          vf_explained_var: 0.9939673542976379
          vf_loss: 0.2071907106254782
    num_agent_steps_sampled: 471528
    num_agent_steps_trained: 471528
    num_steps_sampled: 471528
    num_steps_trained: 471528
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,236,6159.08,471528,20.3444,21.05,1.94,93.92




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 473526
  custom_metrics: {}
  date: 2021-11-13_23-12-54
  done: false
  episode_len_mean: 93.38
  episode_media: {}
  episode_reward_max: 21.069999999999936
  episode_reward_mean: 20.316899999999933
  episode_reward_min: 1.94
  episodes_this_iter: 21
  episodes_total: 4660
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.1991205780279068
          entropy_coeff: 0.009999999999999998
          kl: 0.011346941912291492
          policy_loss: 0.005604872533253261
          total_loss: 0.06277174981577056
          vf_explained_var: 0.9982219934463501
          vf_loss: 0.06628589099716573
    num_agent_steps_sampled: 473526
    num_agent_steps_trained: 473526
    num_steps_sampled: 473526
    num_steps_trained: 473526
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,237,6199.37,473526,20.3169,21.07,1.94,93.38


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 475524
  custom_metrics: {}
  date: 2021-11-13_23-13-17
  done: false
  episode_len_mean: 93.94
  episode_media: {}
  episode_reward_max: 21.069999999999936
  episode_reward_mean: 20.28609999999993
  episode_reward_min: 1.94
  episodes_this_iter: 20
  episodes_total: 4680
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3061660630362375
          entropy_coeff: 0.009999999999999998
          kl: 0.00964103385457014
          policy_loss: -0.049480897436539335
          total_loss: -0.01254508596445833
          vf_explained_var: 0.9985045790672302
          vf_loss: 0.04755708406280194
    num_agent_steps_sampled: 475524
    num_agent_steps_trained: 475524
    num_steps_sampled: 475524
    num_steps_trained: 475524
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,238,6222.71,475524,20.2861,21.07,1.94,93.94


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 477522
  custom_metrics: {}
  date: 2021-11-13_23-13-41
  done: false
  episode_len_mean: 94.68
  episode_media: {}
  episode_reward_max: 21.069999999999936
  episode_reward_mean: 20.19189999999993
  episode_reward_min: 1.94
  episodes_this_iter: 21
  episodes_total: 4701
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.344293216864268
          entropy_coeff: 0.009999999999999998
          kl: 0.011474865216542788
          policy_loss: -0.00036382703554062616
          total_loss: 0.056756323238923434
          vf_explained_var: 0.9982113838195801
          vf_loss: 0.06765850718088803
    num_agent_steps_sampled: 477522
    num_agent_steps_trained: 477522
    num_steps_sampled: 477522
    num_steps_trained: 477522
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,239,6246.01,477522,20.1919,21.07,1.94,94.68


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 479520
  custom_metrics: {}
  date: 2021-11-13_23-14-03
  done: false
  episode_len_mean: 96.26
  episode_media: {}
  episode_reward_max: 21.069999999999936
  episode_reward_mean: 20.453699999999927
  episode_reward_min: 1.95
  episodes_this_iter: 19
  episodes_total: 4720
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.279663756347838
          entropy_coeff: 0.009999999999999998
          kl: 0.014753006620893815
          policy_loss: -0.0227176234126091
          total_loss: -0.011668672341675986
          vf_explained_var: 0.9993982315063477
          vf_loss: 0.020111231862877808
    num_agent_steps_sampled: 479520
    num_agent_steps_trained: 479520
    num_steps_sampled: 479520
    num_steps_trained: 479520
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,240,6268.55,479520,20.4537,21.07,1.95,96.26


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 481518
  custom_metrics: {}
  date: 2021-11-13_23-14-27
  done: false
  episode_len_mean: 98.46
  episode_media: {}
  episode_reward_max: 21.069999999999936
  episode_reward_mean: 20.708999999999925
  episode_reward_min: 16.579999999999927
  episodes_this_iter: 21
  episodes_total: 4741
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3609485211826506
          entropy_coeff: 0.009999999999999998
          kl: 0.011390095363792712
          policy_loss: 0.0003040900720017297
          total_loss: 0.026849073295791944
          vf_explained_var: 0.9989697933197021
          vf_loss: 0.03727135203246559
    num_agent_steps_sampled: 481518
    num_agent_steps_trained: 481518
    num_steps_sampled: 481518
    num_steps_trained: 481518
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,241,6291.71,481518,20.709,21.07,16.58,98.46


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 483516
  custom_metrics: {}
  date: 2021-11-13_23-14-48
  done: false
  episode_len_mean: 99.97
  episode_media: {}
  episode_reward_max: 21.029999999999937
  episode_reward_mean: 20.731199999999923
  episode_reward_min: 16.579999999999927
  episodes_this_iter: 18
  episodes_total: 4759
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.323291037763868
          entropy_coeff: 0.009999999999999998
          kl: 0.010701032161935247
          policy_loss: -0.013166098970742453
          total_loss: 0.013649186154916173
          vf_explained_var: 0.9988694190979004
          vf_loss: 0.0373394992690356
    num_agent_steps_sampled: 483516
    num_agent_steps_trained: 483516
    num_steps_sampled: 483516
    num_steps_trained: 483516
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,242,6313.56,483516,20.7312,21.03,16.58,99.97


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 485514
  custom_metrics: {}
  date: 2021-11-13_23-15-11
  done: false
  episode_len_mean: 99.87
  episode_media: {}
  episode_reward_max: 21.039999999999935
  episode_reward_mean: 20.723399999999923
  episode_reward_min: 16.579999999999927
  episodes_this_iter: 21
  episodes_total: 4780
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3225820246196929
          entropy_coeff: 0.009999999999999998
          kl: 0.007957456875727721
          policy_loss: -0.07840678571235565
          total_loss: -0.05254419680152621
          vf_explained_var: 0.9990901350975037
          vf_loss: 0.03707417724210592
    num_agent_steps_sampled: 485514
    num_agent_steps_trained: 485514
    num_steps_sampled: 485514
    num_steps_trained: 485514
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,243,6336.1,485514,20.7234,21.04,16.58,99.87


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 487512
  custom_metrics: {}
  date: 2021-11-13_23-15-34
  done: false
  episode_len_mean: 100.16
  episode_media: {}
  episode_reward_max: 21.039999999999935
  episode_reward_mean: 20.763999999999918
  episode_reward_min: 18.459999999999887
  episodes_this_iter: 20
  episodes_total: 4800
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3749241051219758
          entropy_coeff: 0.009999999999999998
          kl: 0.009579684198102761
          policy_loss: -0.02476291214781148
          total_loss: -0.012389172400747026
          vf_explained_var: 0.9993321299552917
          vf_loss: 0.02369812183481242
    num_agent_steps_sampled: 487512
    num_agent_steps_trained: 487512
    num_steps_sampled: 487512
    num_steps_trained: 487512
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,244,6359.03,487512,20.764,21.04,18.46,100.16


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 489510
  custom_metrics: {}
  date: 2021-11-13_23-15-57
  done: false
  episode_len_mean: 100.2
  episode_media: {}
  episode_reward_max: 21.049999999999937
  episode_reward_mean: 20.72179999999992
  episode_reward_min: 18.459999999999887
  episodes_this_iter: 19
  episodes_total: 4819
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3122726832117353
          entropy_coeff: 0.009999999999999998
          kl: 0.014186951522014666
          policy_loss: -0.0033743329700969514
          total_loss: 0.025934548622795515
          vf_explained_var: 0.9988930821418762
          vf_loss: 0.038840535518136765
    num_agent_steps_sampled: 489510
    num_agent_steps_trained: 489510
    num_steps_sampled: 489510
    num_steps_trained: 489510
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,245,6381.86,489510,20.7218,21.05,18.46,100.2


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 491508
  custom_metrics: {}
  date: 2021-11-13_23-16-19
  done: false
  episode_len_mean: 99.92
  episode_media: {}
  episode_reward_max: 21.049999999999937
  episode_reward_mean: 20.746499999999916
  episode_reward_min: 18.459999999999887
  episodes_this_iter: 21
  episodes_total: 4840
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2901629090309144
          entropy_coeff: 0.009999999999999998
          kl: 0.012144193604069913
          policy_loss: -0.011719097662717105
          total_loss: 0.007935264888441278
          vf_explained_var: 0.9991973638534546
          vf_loss: 0.029481991662067317
    num_agent_steps_sampled: 491508
    num_agent_steps_trained: 491508
    num_steps_sampled: 491508
    num_steps_trained: 491508
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,246,6403.9,491508,20.7465,21.05,18.46,99.92


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 493506
  custom_metrics: {}
  date: 2021-11-13_23-16-42
  done: false
  episode_len_mean: 98.73
  episode_media: {}
  episode_reward_max: 21.049999999999937
  episode_reward_mean: 20.782199999999925
  episode_reward_min: 18.459999999999887
  episodes_this_iter: 21
  episodes_total: 4861
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.284473244916825
          entropy_coeff: 0.009999999999999998
          kl: 0.012617092351927692
          policy_loss: -0.01274563122008528
          total_loss: -0.015371828756871677
          vf_explained_var: 0.9997955560684204
          vf_loss: 0.007024831731715018
    num_agent_steps_sampled: 493506
    num_agent_steps_trained: 493506
    num_steps_sampled: 493506
    num_steps_trained: 493506
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,247,6427.28,493506,20.7822,21.05,18.46,98.73


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 495504
  custom_metrics: {}
  date: 2021-11-13_23-17-05
  done: false
  episode_len_mean: 98.89
  episode_media: {}
  episode_reward_max: 21.049999999999937
  episode_reward_mean: 20.733099999999922
  episode_reward_min: 14.55000000000002
  episodes_this_iter: 20
  episodes_total: 4881
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3273874095508031
          entropy_coeff: 0.009999999999999998
          kl: 0.018444023030030128
          policy_loss: 0.002976304931300027
          total_loss: 0.06324490464869
          vf_explained_var: 0.9980639219284058
          vf_loss: 0.06887383024829129
    num_agent_steps_sampled: 495504
    num_agent_steps_trained: 495504
    num_steps_sampled: 495504
    num_steps_trained: 495504
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,248,6449.85,495504,20.7331,21.05,14.55,98.89


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 497502
  custom_metrics: {}
  date: 2021-11-13_23-17-27
  done: false
  episode_len_mean: 98.51
  episode_media: {}
  episode_reward_max: 21.049999999999937
  episode_reward_mean: 20.741399999999928
  episode_reward_min: 14.55000000000002
  episodes_this_iter: 19
  episodes_total: 4900
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3322782465389797
          entropy_coeff: 0.009999999999999998
          kl: 0.011269374006171526
          policy_loss: -0.008409569235075088
          total_loss: 0.037358125902357556
          vf_explained_var: 0.9983753561973572
          vf_loss: 0.05623791565054229
    num_agent_steps_sampled: 497502
    num_agent_steps_trained: 497502
    num_steps_sampled: 497502
    num_steps_trained: 497502
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,249,6471.94,497502,20.7414,21.05,14.55,98.51


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 499500
  custom_metrics: {}
  date: 2021-11-13_23-17-51
  done: false
  episode_len_mean: 98.16
  episode_media: {}
  episode_reward_max: 21.049999999999937
  episode_reward_mean: 20.78359999999993
  episode_reward_min: 14.55000000000002
  episodes_this_iter: 21
  episodes_total: 4921
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2484339941115605
          entropy_coeff: 0.009999999999999998
          kl: 0.018546765950736285
          policy_loss: -0.017956489679359255
          total_loss: 0.0050440182998066855
          vf_explained_var: 0.9991536736488342
          vf_loss: 0.030790197835969074
    num_agent_steps_sampled: 499500
    num_agent_steps_trained: 499500
    num_steps_sampled: 499500
    num_steps_trained: 499500
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,250,6495.61,499500,20.7836,21.05,14.55,98.16


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 501498
  custom_metrics: {}
  date: 2021-11-13_23-18-13
  done: false
  episode_len_mean: 98.05
  episode_media: {}
  episode_reward_max: 21.07999999999994
  episode_reward_mean: 20.80939999999993
  episode_reward_min: 14.55000000000002
  episodes_this_iter: 21
  episodes_total: 4942
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3167400621232532
          entropy_coeff: 0.009999999999999998
          kl: 0.015646985410088868
          policy_loss: -0.031543408511649995
          total_loss: -0.03384772173705555
          vf_explained_var: 0.9997953772544861
          vf_loss: 0.006902445902648781
    num_agent_steps_sampled: 501498
    num_agent_steps_trained: 501498
    num_steps_sampled: 501498
    num_steps_trained: 501498
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,251,6518.09,501498,20.8094,21.08,14.55,98.05


Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 503496
  custom_metrics: {}
  date: 2021-11-13_23-18-39
  done: false
  episode_len_mean: 98.92
  episode_media: {}
  episode_reward_max: 21.07999999999994
  episode_reward_mean: 20.790999999999922
  episode_reward_min: 14.55000000000002
  episodes_this_iter: 20
  episodes_total: 4962
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3494889599936348
          entropy_coeff: 0.009999999999999998
          kl: 0.010450608094652451
          policy_loss: -0.018639817869379408
          total_loss: -0.021719496395616305
          vf_explained_var: 0.9997652173042297
          vf_loss: 0.007769900234416127
    num_agent_steps_sampled: 503496
    num_agent_steps_trained: 503496
    num_steps_sampled: 503496
    num_steps_trained: 503496
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,252,6543.35,503496,20.791,21.08,14.55,98.92




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 505494
  custom_metrics: {}
  date: 2021-11-13_23-19-19
  done: false
  episode_len_mean: 98.33
  episode_media: {}
  episode_reward_max: 21.07999999999994
  episode_reward_mean: 20.63099999999992
  episode_reward_min: 1.94
  episodes_this_iter: 20
  episodes_total: 4982
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2613456538745336
          entropy_coeff: 0.009999999999999998
          kl: 0.014889110294309373
          policy_loss: 1.7660146667843774e-05
          total_loss: 0.752892008193192
          vf_explained_var: 0.9788253903388977
          vf_loss: 0.7617189928623183
    num_agent_steps_sampled: 505494
    num_agent_steps_trained: 505494
    num_steps_sampled: 505494
    num_steps_trained: 505494
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,253,6584.06,505494,20.631,21.08,1.94,98.33




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 507492
  custom_metrics: {}
  date: 2021-11-13_23-19-59
  done: false
  episode_len_mean: 97.88
  episode_media: {}
  episode_reward_max: 21.07999999999994
  episode_reward_mean: 20.42709999999992
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 22
  episodes_total: 5004
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3304259964397975
          entropy_coeff: 0.009999999999999998
          kl: 0.006856749182977698
          policy_loss: 4.055843289409365e-05
          total_loss: 0.7725613987173087
          vf_explained_var: 0.9780187010765076
          vf_loss: 0.7840895112958692
    num_agent_steps_sampled: 507492
    num_agent_steps_trained: 507492
    num_steps_sampled: 507492
    num_steps_trained: 507492
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,254,6623.43,507492,20.4271,21.08,-0.06,97.88




Result for PPO_my_env_c1ec9_00000:
  agent_timesteps_total: 509490
  custom_metrics: {}
  date: 2021-11-13_23-20-37
  done: false
  episode_len_mean: 97.26
  episode_media: {}
  episode_reward_max: 21.07999999999994
  episode_reward_mean: 20.21819999999992
  episode_reward_min: -0.060000000000000005
  episodes_this_iter: 20
  episodes_total: 5024
  experiment_id: 3471556cf6b5442e8ddf461993f1dbed
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3472122016407195
          entropy_coeff: 0.009999999999999998
          kl: 0.004030742534568872
          policy_loss: 0.00155059899247828
          total_loss: 0.6336043970304586
          vf_explained_var: 0.981549084186554
          vf_loss: 0.6445056505856059
    num_agent_steps_sampled: 509490
    num_agent_steps_trained: 509490
    num_steps_sampled: 509490
    num_steps_trained: 509490
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_c1ec9_00000,RUNNING,192.168.3.5:97382,255,6661.89,509490,20.2182,21.08,-0.06,97.26
