In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=2, stride=2, padding=0),  
            nn.ELU(),
            nn.Conv2d(32, 32, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(32, 64, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(64, 128, kernel_size=2, stride=2, padding=0),
            nn.ELU(), 
            nn.Conv2d(128, 256, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Conv2d(256, 512, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
from torch.nn.functional import one_hot

class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        visual_features_dim = 512
        target_features_dim = 9 * 11 * 11 
        self.visual_encoder = VisualEncoder()
        self.visual_encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AngelaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.target_encoder = nn.Sequential(
            nn.Conv3d(7, 1, kernel_size=1, stride=1, padding=0),
            nn.ELU(),
        )
        policy_hidden_dim = 256 
        self.policy_network = nn.Sequential(
            nn.Linear(visual_features_dim + target_features_dim, 512),
            nn.ELU(),
            nn.Linear(512, policy_hidden_dim),
            nn.ELU(),
            nn.Linear(policy_hidden_dim, policy_hidden_dim),
            nn.ELU(),
            nn.Linear(policy_hidden_dim, policy_hidden_dim),
            nn.ELU(),
            nn.Linear(policy_hidden_dim, policy_hidden_dim),
            nn.ELU(),
        )
        self.action_head = nn.Linear(policy_hidden_dim, action_space.n)
        self.value_head = nn.Linear(policy_hidden_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.visual_encoder.cuda()
            self.target_encoder.cuda()
            self.policy_network.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs']
        pov = obs['pov'].permute(0, 3, 1, 2).float() / 255.0
        target = one_hot(obs['target_grid'].long(), num_classes=7).permute(0, 4, 1, 2, 3).float()
        if self.use_cuda:
            pov.cuda()
            target.cuda()
            
        with torch.no_grad():
            visual_features = self.visual_encoder(pov)
            
        target_features = self.target_encoder(target)
        target_features = target_features.reshape(target_features.shape[0], -1)
        features = torch.cat([visual_features, target_features], dim=1)
        features = self.policy_network(features)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
class VisualObservationWrapper(ObsWrapper):
    def __init__(self, env, include_target=False):
        super().__init__(env)
        self.observation_space = {   
            'pov': gym.spaces.Box(low=0, high=255, shape=(64, 64, 3)),
            'inventory': gym.spaces.Box(low=0.0, high=20.0, shape=(6,)),
            'compass': gym.spaces.Box(low=-180.0, high=180.0, shape=(1,))
        }
        if include_target:
            self.observation_space['target_grid'] = \
                gym.spaces.Box(low=0, high=6, shape=(9, 11, 11))
        self.observation_space = gym.spaces.Dict(self.observation_space)

    def observation(self, obs, reward=None, done=None, info=None):
        if info is not None:
            if 'target_grid' in info:
                target_grid = info['target_grid']
                del info['target_grid']
            else:
                logger.error(f'info: {info}')
                if hasattr(self.unwrapped, 'should_reset'):
                    self.unwrapped.should_reset(True)
                target_grid = self.env.unwrapped.tasks.current.target_grid
        else:
            target_grid = self.env.unwrapped.tasks.current.target_grid
        return {
            'pov': obs['pov'].astype(np.float32),
            'inventory': obs['inventory'],
            'compass': np.array([obs['compass']['angle'].item()]),
            'target_grid': target_grid
        }

In [6]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, rew):
        if rew == 0:
            rew = -0.01
        return rew
    
def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C3', 'C17', 'C20', 'C22', 'C32', 'C40', 'C85', 'C87', 'C93']))
    #env = PovOnlyWrapper(env)
    env = VisualObservationWrapper(env, include_target=True)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    env = RewardWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             #"gamma": 0.99,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO MultiTask <=10 pretrained (AngelaCNN + changed policy) (3 noops after placement) r: -0.01"
                  }
              }

        },
        loggers=[WandbLogger])



Trial name,status,loc
PPO_my_env_4b4a2_00000,PENDING,


2021-10-24 13:21:27,928	INFO wandb.py:170 -- Already logged into W&B.
2021-10-24 13:21:27,939	ERROR syncer.py:72 -- Log sync requires rsync to be installed.
[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.5 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=380543)[0m 2021-10-24 13:21:31,457	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=380543)[0m 2021-10-24 13:21:31,457	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-10-24_13-22-45
  done: false
  episode_len_mean: 407.5
  episode_media: {}
  episode_reward_max: -4.189999999999955
  episode_reward_mean: -9.379999999999951
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 2
  episodes_total: 2
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.88430388768514
          entropy_coeff: 0.009999999999999998
          kl: 0.005359336429532312
          policy_loss: 0.1058047072754966
          total_loss: 0.3807262102762858
          vf_explained_var: -0.13551174104213715
          vf_loss: 0.30269267728435806
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,1,68.3311,1000,-9.38,-4.19,-14.57,407.5


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-10-24_13-23-13
  done: false
  episode_len_mean: 409.25
  episode_media: {}
  episode_reward_max: -4.089999999999957
  episode_reward_mean: -6.744999999999954
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 2
  episodes_total: 4
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.880343461036682
          entropy_coeff: 0.009999999999999998
          kl: 0.0038223870718520118
          policy_loss: 0.03334708702233102
          total_loss: 0.21720773958497577
          vf_explained_var: -0.259689062833786
          vf_loss: 0.21189961622779568
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,2,95.7986,2000,-6.745,-4.09,-14.57,409.25


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-10-24_13-23-38
  done: false
  episode_len_mean: 411.42857142857144
  episode_media: {}
  episode_reward_max: -4.089999999999957
  episode_reward_mean: -6.1671428571428075
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 3
  episodes_total: 7
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.8637402693430585
          entropy_coeff: 0.009999999999999998
          kl: 0.007777223787296271
          policy_loss: 0.08529037208192879
          total_loss: 0.06441561614887582
          vf_explained_var: 0.3617481589317322
          vf_loss: 0.0069849242068206275
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,3,120.721,3000,-6.16714,-4.09,-14.57,411.429


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-10-24_13-24-02
  done: false
  episode_len_mean: 415.6666666666667
  episode_media: {}
  episode_reward_max: -4.0299999999999585
  episode_reward_mean: -5.753333333333284
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 2
  episodes_total: 9
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.8657065709431966
          entropy_coeff: 0.009999999999999998
          kl: 0.0029981814722072286
          policy_loss: 0.051370571470922895
          total_loss: 0.028304285638862187
          vf_explained_var: 0.3005284368991852
          vf_loss: 0.005290961389740308
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,4,144.479,4000,-5.75333,-4.03,-14.57,415.667


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-10-24_13-24-26
  done: false
  episode_len_mean: 412.9166666666667
  episode_media: {}
  episode_reward_max: -3.8699999999999615
  episode_reward_mean: -5.891666666666617
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 3
  episodes_total: 12
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.8637919452455307
          entropy_coeff: 0.009999999999999998
          kl: 0.009324571621031967
          policy_loss: 0.02014780773056878
          total_loss: 0.08817347702052858
          vf_explained_var: 0.29073312878608704
          vf_loss: 0.09619736303057935
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,5,169.218,5000,-5.89167,-3.87,-14.57,412.917


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-10-24_13-24-50
  done: false
  episode_len_mean: 412.0
  episode_media: {}
  episode_reward_max: -3.8699999999999615
  episode_reward_mean: -5.6307142857142365
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 2
  episodes_total: 14
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.857915931277805
          entropy_coeff: 0.009999999999999998
          kl: 0.0067264830165445835
          policy_loss: -0.07643999200728205
          total_loss: -0.09400515688790215
          vf_explained_var: 0.15042069554328918
          vf_loss: 0.010677671308318775
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained: 6000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,6,192.474,6000,-5.63071,-3.87,-14.57,412


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-10-24_13-25-14
  done: false
  episode_len_mean: 410.88235294117646
  episode_media: {}
  episode_reward_max: -3.8699999999999615
  episode_reward_mean: -5.352941176470542
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 3
  episodes_total: 17
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.86157689359453
          entropy_coeff: 0.009999999999999998
          kl: 0.00586386673848417
          policy_loss: 0.014785119808382458
          total_loss: -0.004635833617713716
          vf_explained_var: -0.07290228456258774
          vf_loss: 0.008901621838514175
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,7,216.718,7000,-5.35294,-3.87,-14.57,410.882


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-10-24_13-25-36
  done: false
  episode_len_mean: 410.4736842105263
  episode_media: {}
  episode_reward_max: -3.8699999999999615
  episode_reward_mean: -5.217894736842059
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 2
  episodes_total: 19
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.8467625591490004
          entropy_coeff: 0.009999999999999998
          kl: 0.0054967777837697206
          policy_loss: 0.09801394848359955
          total_loss: 0.07302356039484342
          vf_explained_var: 0.7789921164512634
          vf_loss: 0.0032023975632101713
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,8,239.14,8000,-5.21789,-3.87,-14.57,410.474


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-10-24_13-25-57
  done: false
  episode_len_mean: 411.76190476190476
  episode_media: {}
  episode_reward_max: -3.8699999999999615
  episode_reward_mean: -5.124761904761859
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 2
  episodes_total: 21
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.802092525694105
          entropy_coeff: 0.009999999999999998
          kl: 0.009422847857244222
          policy_loss: -0.05939520647128423
          total_loss: -0.0815585041211711
          vf_explained_var: 0.7241853475570679
          vf_loss: 0.00538648307038885
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,9,259.785,9000,-5.12476,-3.87,-14.57,411.762


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-10-24_13-26-16
  done: false
  episode_len_mean: 416.2916666666667
  episode_media: {}
  episode_reward_max: -3.8699999999999615
  episode_reward_mean: -5.044166666666619
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 3
  episodes_total: 24
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.79072376092275
          entropy_coeff: 0.009999999999999998
          kl: 0.008623493259970121
          policy_loss: -0.06873717043134901
          total_loss: -0.09117124279340108
          vf_explained_var: 0.7958502173423767
          vf_loss: 0.005041990479609618
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,10,278.93,10000,-5.04417,-3.87,-14.57,416.292


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-10-24_13-26-36
  done: false
  episode_len_mean: 418.9230769230769
  episode_media: {}
  episode_reward_max: -3.8699999999999615
  episode_reward_mean: -5.002692307692261
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 2
  episodes_total: 26
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.7891933467653063
          entropy_coeff: 0.009999999999999998
          kl: 0.008649930648380138
          policy_loss: 0.06427860044770771
          total_loss: 0.04022775474521849
          vf_explained_var: 0.8645550012588501
          vf_loss: 0.003408590362070956
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,11,298.331,11000,-5.00269,-3.87,-14.57,418.923


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-10-24_13-26-55
  done: false
  episode_len_mean: 419.7857142857143
  episode_media: {}
  episode_reward_max: -3.8699999999999615
  episode_reward_mean: -4.953214285714238
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 2
  episodes_total: 28
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.7519490480422975
          entropy_coeff: 0.009999999999999998
          kl: 0.008931262962184543
          policy_loss: -0.03598511914412181
          total_loss: -0.05621576234698296
          vf_explained_var: 0.6135743856430054
          vf_loss: 0.006842284664485811
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,12,317.303,12000,-4.95321,-3.87,-14.57,419.786




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-10-24_13-27-30
  done: false
  episode_len_mean: 421.9
  episode_media: {}
  episode_reward_max: -3.8699999999999615
  episode_reward_mean: -4.9239999999999515
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 2
  episodes_total: 30
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.7551762660344443
          entropy_coeff: 0.009999999999999998
          kl: 0.010052299350094293
          policy_loss: -0.11716279453701443
          total_loss: -0.13601798208223448
          vf_explained_var: 0.5371256470680237
          vf_loss: 0.008193958817153341
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 13000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,13,352.541,13000,-4.924,-3.87,-14.57,421.9


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-10-24_13-27-54
  done: false
  episode_len_mean: 425.59375
  episode_media: {}
  episode_reward_max: -3.8699999999999615
  episode_reward_mean: -4.916874999999951
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 2
  episodes_total: 32
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.7047678258683945
          entropy_coeff: 0.009999999999999998
          kl: 0.011030154584458766
          policy_loss: -0.12582069900300769
          total_loss: -0.1439033266570833
          vf_explained_var: 0.3569977283477783
          vf_loss: 0.008413546573299553
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained: 1400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,14,376.43,14000,-4.91687,-3.87,-14.57,425.594


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-10-24_13-28-15
  done: false
  episode_len_mean: 429.1470588235294
  episode_media: {}
  episode_reward_max: -3.8699999999999615
  episode_reward_mean: -4.913529411764657
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 2
  episodes_total: 34
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.690902331140306
          entropy_coeff: 0.009999999999999998
          kl: 0.006897922081324411
          policy_loss: -0.11341940859953563
          total_loss: -0.13096090919441647
          vf_explained_var: 0.3277705907821655
          vf_loss: 0.009022625293194626
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,15,397.515,15000,-4.91353,-3.87,-14.57,429.147


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-10-24_13-28-37
  done: false
  episode_len_mean: 429.4864864864865
  episode_media: {}
  episode_reward_max: -3.8699999999999615
  episode_reward_mean: -4.866486486486437
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 3
  episodes_total: 37
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.7218888176812066
          entropy_coeff: 0.009999999999999998
          kl: 0.013307053525752366
          policy_loss: 0.046900510208474265
          total_loss: 0.0299309813314014
          vf_explained_var: 0.2387276142835617
          vf_loss: 0.009584005061899208
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,16,419.403,16000,-4.86649,-3.87,-14.57,429.486


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-10-24_13-28-59
  done: false
  episode_len_mean: 429.2564102564103
  episode_media: {}
  episode_reward_max: -3.8699999999999615
  episode_reward_mean: -4.834871794871745
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 2
  episodes_total: 39
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.762646926773919
          entropy_coeff: 0.009999999999999998
          kl: 0.009849403061103167
          policy_loss: -0.051389231118890974
          total_loss: -0.06848162727223503
          vf_explained_var: 0.06664454191923141
          vf_loss: 0.010041599159335925
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,17,441.175,17000,-4.83487,-3.87,-14.57,429.256


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-10-24_13-29-21
  done: false
  episode_len_mean: 428.4761904761905
  episode_media: {}
  episode_reward_max: -3.8699999999999615
  episode_reward_mean: -4.788333333333284
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 3
  episodes_total: 42
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.757556094063653
          entropy_coeff: 0.009999999999999998
          kl: 0.007747989675264528
          policy_loss: -0.0035424255662494237
          total_loss: -0.021361231472757126
          vf_explained_var: 0.20351792871952057
          vf_loss: 0.009369354958309688
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,18,463.849,18000,-4.78833,-3.87,-14.57,428.476


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-10-24_13-29-45
  done: false
  episode_len_mean: 426.47727272727275
  episode_media: {}
  episode_reward_max: -3.8299999999999623
  episode_reward_mean: -4.745454545454496
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 2
  episodes_total: 44
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.7390783071517943
          entropy_coeff: 0.009999999999999998
          kl: 0.007586226554122611
          policy_loss: -0.04782730109161801
          total_loss: -0.06543690727816688
          vf_explained_var: 0.44524943828582764
          vf_loss: 0.009401864207272107
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,19,487.456,19000,-4.74545,-3.83,-14.57,426.477


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-10-24_13-30-08
  done: false
  episode_len_mean: 423.51063829787233
  episode_media: {}
  episode_reward_max: -3.7799999999999634
  episode_reward_mean: -4.685106382978676
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 3
  episodes_total: 47
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.64856805006663
          entropy_coeff: 0.009999999999999998
          kl: 0.010768824265975536
          policy_loss: 0.04829160703553094
          total_loss: 0.032799794773260754
          vf_explained_var: 0.20038259029388428
          vf_loss: 0.010455425841630332
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,20,510.883,20000,-4.68511,-3.78,-14.57,423.511


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-10-24_13-30-33
  done: false
  episode_len_mean: 421.6734693877551
  episode_media: {}
  episode_reward_max: -3.699999999999965
  episode_reward_mean: -4.648367346938729
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 2
  episodes_total: 49
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.660336396429274
          entropy_coeff: 0.009999999999999998
          kl: 0.014142205701428523
          policy_loss: -0.1246736056274838
          total_loss: -0.13928459021780226
          vf_explained_var: 0.15699106454849243
          vf_loss: 0.011285273222407946
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,21,535.505,21000,-4.64837,-3.7,-14.57,421.673


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-10-24_13-30-56
  done: false
  episode_len_mean: 419.5
  episode_media: {}
  episode_reward_max: -3.6699999999999657
  episode_reward_mean: -4.601730769230723
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 3
  episodes_total: 52
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.6243520471784803
          entropy_coeff: 0.009999999999999998
          kl: 0.012735348652799771
          policy_loss: 0.02514666650030348
          total_loss: 0.009611735824081633
          vf_explained_var: 0.20497895777225494
          vf_loss: 0.010071825625426653
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained: 22000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,22,558.655,22000,-4.60173,-3.67,-14.57,419.5


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-10-24_13-31-19
  done: false
  episode_len_mean: 417.92727272727274
  episode_media: {}
  episode_reward_max: -3.6699999999999657
  episode_reward_mean: -4.563818181818136
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 3
  episodes_total: 55
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.6514514499240454
          entropy_coeff: 0.009999999999999998
          kl: 0.0113813852137485
          policy_loss: -0.0029149037268426685
          total_loss: -0.01758501728375753
          vf_explained_var: 0.142608180642128
          vf_loss: 0.011275332272958218
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,23,581.086,23000,-4.56382,-3.67,-14.57,417.927


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-10-24_13-31-40
  done: false
  episode_len_mean: 417.5964912280702
  episode_media: {}
  episode_reward_max: -3.6699999999999657
  episode_reward_mean: -4.547017543859603
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 2
  episodes_total: 57
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.711485113037957
          entropy_coeff: 0.009999999999999998
          kl: 0.011967323323325414
          policy_loss: 0.02315625962283876
          total_loss: 0.001988286276658376
          vf_explained_var: 0.18542155623435974
          vf_loss: 0.005348510390548553
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,24,602.304,24000,-4.54702,-3.67,-14.57,417.596




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-10-24_13-32-17
  done: false
  episode_len_mean: 416.23333333333335
  episode_media: {}
  episode_reward_max: -3.569999999999968
  episode_reward_mean: -4.514833333333288
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 3
  episodes_total: 60
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.6623299333784316
          entropy_coeff: 0.009999999999999998
          kl: 0.011929440511884614
          policy_loss: 0.09371734244955911
          total_loss: 0.07371187491549386
          vf_explained_var: 0.4078744351863861
          vf_loss: 0.006021360308255276
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,25,639.342,25000,-4.51483,-3.57,-14.57,416.233


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-10-24_13-32-39
  done: false
  episode_len_mean: 415.9032258064516
  episode_media: {}
  episode_reward_max: -3.569999999999968
  episode_reward_mean: -4.500161290322535
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 2
  episodes_total: 62
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.650097046958076
          entropy_coeff: 0.009999999999999998
          kl: 0.008321047345163812
          policy_loss: 0.045744640131791435
          total_loss: 0.026641113228268093
          vf_explained_var: 0.21772433817386627
          vf_loss: 0.006981389261879182
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,26,661.437,26000,-4.50016,-3.57,-14.57,415.903


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-10-24_13-33-03
  done: false
  episode_len_mean: 414.0
  episode_media: {}
  episode_reward_max: -3.569999999999968
  episode_reward_mean: -4.465384615384571
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 3
  episodes_total: 65
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.5693982601165772
          entropy_coeff: 0.009999999999999998
          kl: 0.010688512164251858
          policy_loss: 0.048386731412675646
          total_loss: 0.03581677857372496
          vf_explained_var: 0.23517434298992157
          vf_loss: 0.012589600684198862
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained: 27000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,27,685.285,27000,-4.46538,-3.57,-14.57,414


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-10-24_13-33-28
  done: false
  episode_len_mean: 411.4852941176471
  episode_media: {}
  episode_reward_max: -3.4999999999999694
  episode_reward_mean: -4.425882352941132
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 3
  episodes_total: 68
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.605496581395467
          entropy_coeff: 0.009999999999999998
          kl: 0.009836199851868333
          policy_loss: 0.03624683262573348
          total_loss: 0.02241532024410036
          vf_explained_var: 0.10481560230255127
          vf_loss: 0.01173164223631223
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,28,709.918,28000,-4.42588,-3.5,-14.57,411.485


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-10-24_13-33-51
  done: false
  episode_len_mean: 410.24285714285713
  episode_media: {}
  episode_reward_max: -3.4999999999999694
  episode_reward_mean: -4.404571428571384
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 2
  episodes_total: 70
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.622637814945645
          entropy_coeff: 0.009999999999999998
          kl: 0.011814533246134474
          policy_loss: -0.10158421380652322
          total_loss: -0.11462485094865163
          vf_explained_var: 0.28448349237442017
          vf_loss: 0.012595017590663499
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,29,733.605,29000,-4.40457,-3.5,-14.57,410.243


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-10-24_13-34-18
  done: false
  episode_len_mean: 409.06849315068496
  episode_media: {}
  episode_reward_max: -3.4999999999999694
  episode_reward_mean: -4.380410958904067
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 3
  episodes_total: 73
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.677878244717916
          entropy_coeff: 0.009999999999999998
          kl: 0.010091685878115283
          policy_loss: 0.04806926581594679
          total_loss: 0.03356491459740533
          vf_explained_var: 0.271481454372406
          vf_loss: 0.01176984731767637
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,30,759.857,30000,-4.38041,-3.5,-14.57,409.068


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-10-24_13-34-42
  done: false
  episode_len_mean: 408.8
  episode_media: {}
  episode_reward_max: -3.4999999999999694
  episode_reward_mean: -4.369999999999957
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 2
  episodes_total: 75
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.671316901842753
          entropy_coeff: 0.009999999999999998
          kl: 0.0113290529926269
          policy_loss: -0.12211359474394057
          total_loss: -0.1358913911713494
          vf_explained_var: 0.3587067127227783
          vf_loss: 0.012368921453081485
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trained: 31000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,31,784.21,31000,-4.37,-3.5,-14.57,408.8


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-10-24_13-35-06
  done: false
  episode_len_mean: 407.7564102564103
  episode_media: {}
  episode_reward_max: -3.4999999999999694
  episode_reward_mean: -4.348717948717906
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 3
  episodes_total: 78
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.6616600354512534
          entropy_coeff: 0.009999999999999998
          kl: 0.013075856874647782
          policy_loss: -0.044789241751035054
          total_loss: -0.058720867998070185
          vf_explained_var: 0.15283510088920593
          vf_loss: 0.01203118073948038
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,32,808.546,32000,-4.34872,-3.5,-14.57,407.756


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-10-24_13-35-29
  done: false
  episode_len_mean: 406.81481481481484
  episode_media: {}
  episode_reward_max: -3.4999999999999694
  episode_reward_mean: -4.329259259259215
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 3
  episodes_total: 81
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.642324619823032
          entropy_coeff: 0.009999999999999998
          kl: 0.009879068393910017
          policy_loss: 0.018496711055437723
          total_loss: 0.005058300246795018
          vf_explained_var: -0.0008851991733536124
          vf_loss: 0.01249088120012958
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,33,831.746,33000,-4.32926,-3.5,-14.57,406.815


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-10-24_13-35-54
  done: false
  episode_len_mean: 405.71084337349396
  episode_media: {}
  episode_reward_max: -3.4899999999999696
  episode_reward_mean: -4.31192771084333
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 2
  episodes_total: 83
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.6442885716756184
          entropy_coeff: 0.009999999999999998
          kl: 0.010795985133611907
          policy_loss: -0.1081277416812049
          total_loss: -0.12115695310963524
          vf_explained_var: 0.3173306882381439
          vf_loss: 0.012873876586349474
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,34,856.24,34000,-4.31193,-3.49,-14.57,405.711


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-10-24_13-36-18
  done: false
  episode_len_mean: 404.2674418604651
  episode_media: {}
  episode_reward_max: -3.4899999999999696
  episode_reward_mean: -4.288604651162747
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 3
  episodes_total: 86
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.655133851369222
          entropy_coeff: 0.009999999999999998
          kl: 0.011890041425621767
          policy_loss: -0.10405504579345386
          total_loss: -0.11197645515203476
          vf_explained_var: 0.16498592495918274
          vf_loss: 0.01803542369355758
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,35,880.366,35000,-4.2886,-3.49,-14.57,404.267


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-10-24_13-36-40
  done: false
  episode_len_mean: 403.13483146067415
  episode_media: {}
  episode_reward_max: -3.46999999999997
  episode_reward_mean: -4.268988764044901
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 3
  episodes_total: 89
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.598322113355001
          entropy_coeff: 0.009999999999999998
          kl: 0.011083170390665921
          policy_loss: 0.05041109836763806
          total_loss: 0.035979920128981276
          vf_explained_var: 0.3048502504825592
          vf_loss: 0.01099788560726059
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,36,902.224,36000,-4.26899,-3.47,-14.57,403.135




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-10-24_13-37-22
  done: false
  episode_len_mean: 401.6521739130435
  episode_media: {}
  episode_reward_max: -3.289999999999974
  episode_reward_mean: -4.246413043478219
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 3
  episodes_total: 92
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.5759582837422688
          entropy_coeff: 0.009999999999999998
          kl: 0.010282551444605768
          policy_loss: 0.056033299614985786
          total_loss: 0.04016412819425265
          vf_explained_var: 0.09309017658233643
          vf_loss: 0.009376281021589723
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,37,943.714,37000,-4.24641,-3.29,-14.57,401.652


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-10-24_13-37-45
  done: false
  episode_len_mean: 401.3723404255319
  episode_media: {}
  episode_reward_max: -3.289999999999974
  episode_reward_mean: -4.2387234042552775
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 2
  episodes_total: 94
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.5277925199932523
          entropy_coeff: 0.009999999999999998
          kl: 0.011795405507909489
          policy_loss: -0.09179340286387337
          total_loss: -0.10434600694311989
          vf_explained_var: 0.03218136727809906
          vf_loss: 0.012135552890443553
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,38,966.891,38000,-4.23872,-3.29,-14.57,401.372


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-10-24_13-38-08
  done: false
  episode_len_mean: 400.63917525773195
  episode_media: {}
  episode_reward_max: -3.289999999999974
  episode_reward_mean: -4.22443298969068
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 3
  episodes_total: 97
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.5245462947421604
          entropy_coeff: 0.009999999999999998
          kl: 0.01018343463067577
          policy_loss: 0.040793982479307385
          total_loss: 0.028919815023740133
          vf_explained_var: 0.11086351424455643
          vf_loss: 0.012862121213563822
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,39,990.571,39000,-4.22443,-3.29,-14.57,400.639


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-10-24_13-38-35
  done: false
  episode_len_mean: 399.11
  episode_media: {}
  episode_reward_max: -3.289999999999974
  episode_reward_mean: -4.202599999999959
  episode_reward_min: -14.569999999999947
  episodes_this_iter: 3
  episodes_total: 100
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.4697987503475614
          entropy_coeff: 0.009999999999999998
          kl: 0.010634254484794405
          policy_loss: 0.08543101615375942
          total_loss: 0.06884671085410649
          vf_explained_var: 0.2760803997516632
          vf_loss: 0.007581968137916798
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,40,1017.52,40000,-4.2026,-3.29,-14.57,399.11


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-10-24_13-39-03
  done: false
  episode_len_mean: 397.27
  episode_media: {}
  episode_reward_max: -3.289999999999974
  episode_reward_mean: -4.078099999999958
  episode_reward_min: -11.029999999999927
  episodes_this_iter: 3
  episodes_total: 103
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3897101958592732
          entropy_coeff: 0.009999999999999998
          kl: 0.01329011389265911
          policy_loss: 0.02781195549501313
          total_loss: 0.01568396844797664
          vf_explained_var: 0.13525663316249847
          vf_loss: 0.011104609398171306
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 41000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,41,1044.67,41000,-4.0781,-3.29,-11.03,397.27


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-10-24_13-39-31
  done: false
  episode_len_mean: 394.73
  episode_media: {}
  episode_reward_max: -3.1999999999999758
  episode_reward_mean: -4.01509999999996
  episode_reward_min: -11.029999999999927
  episodes_this_iter: 3
  episodes_total: 106
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.341931695408291
          entropy_coeff: 0.009999999999999998
          kl: 0.012903636297818603
          policy_loss: 0.034918449405166835
          total_loss: 0.024850120892127354
          vf_explained_var: 0.14313368499279022
          vf_loss: 0.012705801850340018
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,42,1073.44,42000,-4.0151,-3.2,-11.03,394.73


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-10-24_13-40-01
  done: false
  episode_len_mean: 391.45
  episode_media: {}
  episode_reward_max: -3.1099999999999777
  episode_reward_mean: -3.98229999999996
  episode_reward_min: -11.029999999999927
  episodes_this_iter: 3
  episodes_total: 109
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.267160619629754
          entropy_coeff: 0.009999999999999998
          kl: 0.010114387249010247
          policy_loss: 0.03767357601059808
          total_loss: 0.02765291018618478
          vf_explained_var: 0.1800941824913025
          vf_loss: 0.01214522431190643
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,43,1102.69,43000,-3.9823,-3.11,-11.03,391.45


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-10-24_13-40-31
  done: false
  episode_len_mean: 388.73
  episode_media: {}
  episode_reward_max: -3.039999999999979
  episode_reward_mean: -3.8872999999999602
  episode_reward_min: -4.9199999999999395
  episodes_this_iter: 3
  episodes_total: 112
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1570054213205974
          entropy_coeff: 0.009999999999999998
          kl: 0.0143216754685392
          policy_loss: 0.019795795612865024
          total_loss: 0.010805064108636644
          vf_explained_var: 0.2280743271112442
          vf_loss: 0.011863236289031596
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,44,1133.42,44000,-3.8873,-3.04,-4.92,388.73


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-10-24_13-41-03
  done: false
  episode_len_mean: 385.39
  episode_media: {}
  episode_reward_max: -2.809999999999984
  episode_reward_mean: -3.8538999999999617
  episode_reward_min: -4.9199999999999395
  episodes_this_iter: 3
  episodes_total: 115
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.025714679559072
          entropy_coeff: 0.009999999999999998
          kl: 0.01419542117871515
          policy_loss: -0.10507083924280272
          total_loss: -0.10985836444629563
          vf_explained_var: 0.21830704808235168
          vf_loss: 0.014759851743777593
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,45,1165.23,45000,-3.8539,-2.81,-4.92,385.39


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-10-24_13-41-33
  done: false
  episode_len_mean: 381.07
  episode_media: {}
  episode_reward_max: -2.809999999999984
  episode_reward_mean: -3.810699999999962
  episode_reward_min: -4.9199999999999395
  episodes_this_iter: 4
  episodes_total: 119
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9205636395348442
          entropy_coeff: 0.009999999999999998
          kl: 0.009890947444665857
          policy_loss: 0.046419633759392634
          total_loss: 0.04011409249570635
          vf_explained_var: 0.2687522768974304
          vf_loss: 0.01240554435385598
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,46,1194.51,46000,-3.8107,-2.81,-4.92,381.07




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-10-24_13-42-20
  done: false
  episode_len_mean: 374.42
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.7441999999999624
  episode_reward_min: -4.9199999999999395
  episodes_this_iter: 4
  episodes_total: 123
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9120742903815375
          entropy_coeff: 0.009999999999999998
          kl: 0.007035836148026044
          policy_loss: -0.013683400427301725
          total_loss: -0.017488345669375526
          vf_explained_var: 0.20107559859752655
          vf_loss: 0.014964004585312472
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 470

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,47,1242.18,47000,-3.7442,-2.54,-4.92,374.42


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-10-24_13-42-51
  done: false
  episode_len_mean: 369.22
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.692199999999965
  episode_reward_min: -4.9199999999999395
  episodes_this_iter: 3
  episodes_total: 126
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9249416377809312
          entropy_coeff: 0.009999999999999998
          kl: 0.006246397055851569
          policy_loss: -0.013395390080081091
          total_loss: -0.02182648041182094
          vf_explained_var: 0.19258905947208405
          vf_loss: 0.010506003986423214
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,48,1272.67,48000,-3.6922,-2.54,-4.92,369.22


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-10-24_13-43-21
  done: false
  episode_len_mean: 362.65
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.6264999999999663
  episode_reward_min: -4.9199999999999395
  episodes_this_iter: 4
  episodes_total: 130
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8681173165639242
          entropy_coeff: 0.009999999999999998
          kl: 0.006465584285203245
          policy_loss: 0.010043538361787795
          total_loss: 0.00621196652452151
          vf_explained_var: 0.15400351583957672
          vf_loss: 0.014526323529167308
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,49,1303.26,49000,-3.6265,-2.54,-4.92,362.65


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-10-24_13-43-52
  done: false
  episode_len_mean: 356.66
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.566599999999968
  episode_reward_min: -4.9199999999999395
  episodes_this_iter: 3
  episodes_total: 133
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9435883495542738
          entropy_coeff: 0.009999999999999998
          kl: 0.008688841623871232
          policy_loss: -0.08754099541240268
          total_loss: -0.09329243534141117
          vf_explained_var: 0.11795086413621902
          vf_loss: 0.013250001147389412
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,50,1333.43,50000,-3.5666,-2.54,-4.92,356.66


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-10-24_13-44-21
  done: false
  episode_len_mean: 350.08
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.5007999999999693
  episode_reward_min: -4.41999999999995
  episodes_this_iter: 4
  episodes_total: 137
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9121494478649563
          entropy_coeff: 0.009999999999999998
          kl: 0.007818946592156649
          policy_loss: 0.024478316555420557
          total_loss: 0.02117800298664305
          vf_explained_var: 0.12077346444129944
          vf_loss: 0.015430233751734098
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,51,1362.96,51000,-3.5008,-2.54,-4.42,350.08


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-10-24_13-44-52
  done: false
  episode_len_mean: 345.85
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.4584999999999697
  episode_reward_min: -4.41999999999995
  episodes_this_iter: 3
  episodes_total: 140
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9005720999505784
          entropy_coeff: 0.009999999999999998
          kl: 0.007081227551718901
          policy_loss: -0.11707980889413092
          total_loss: -0.11758271753787994
          vf_explained_var: 0.10957472771406174
          vf_loss: 0.018148752922813097
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,52,1393.59,52000,-3.4585,-2.54,-4.42,345.85


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-10-24_13-45-22
  done: false
  episode_len_mean: 340.78
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.4077999999999706
  episode_reward_min: -4.41999999999995
  episodes_this_iter: 4
  episodes_total: 144
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.824673843383789
          entropy_coeff: 0.009999999999999998
          kl: 0.006130309365384079
          policy_loss: 0.01114232631193267
          total_loss: 0.00977174590031306
          vf_explained_var: 0.19500486552715302
          vf_loss: 0.016569636751794153
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,53,1423.81,53000,-3.4078,-2.54,-4.42,340.78


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-10-24_13-45-54
  done: false
  episode_len_mean: 336.8
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.3679999999999723
  episode_reward_min: -4.41999999999995
  episodes_this_iter: 4
  episodes_total: 148
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8199499289194743
          entropy_coeff: 0.009999999999999998
          kl: 0.008391462535709238
          policy_loss: -0.006122508893410364
          total_loss: -0.005747855661643876
          vf_explained_var: 0.17876967787742615
          vf_loss: 0.018154582981434134
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,54,1455.8,54000,-3.368,-2.54,-4.42,336.8




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-10-24_13-46-43
  done: false
  episode_len_mean: 331.74
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.3173999999999726
  episode_reward_min: -4.41999999999995
  episodes_this_iter: 4
  episodes_total: 152
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8175450947549607
          entropy_coeff: 0.009999999999999998
          kl: 0.007575535715508231
          policy_loss: 0.005364245590236452
          total_loss: 0.0034049292819367514
          vf_explained_var: 0.2224804162979126
          vf_loss: 0.015837357565760612
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,55,1505.21,55000,-3.3174,-2.5,-4.42,331.74


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-10-24_13-47-16
  done: false
  episode_len_mean: 327.94
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.2793999999999737
  episode_reward_min: -4.41999999999995
  episodes_this_iter: 3
  episodes_total: 155
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.867372957865397
          entropy_coeff: 0.009999999999999998
          kl: 0.0077518738533963
          policy_loss: -0.047169538877076575
          total_loss: -0.05249385672310988
          vf_explained_var: 0.20508338510990143
          vf_loss: 0.012961817025724385
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,56,1537.2,56000,-3.2794,-2.5,-4.42,327.94


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-10-24_13-47-45
  done: false
  episode_len_mean: 322.03
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.2202999999999746
  episode_reward_min: -4.159999999999956
  episodes_this_iter: 4
  episodes_total: 159
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.808551503552331
          entropy_coeff: 0.009999999999999998
          kl: 0.0076771912231715965
          policy_loss: -0.008203245202700297
          total_loss: -0.011554401616255443
          vf_explained_var: 0.2818618416786194
          vf_loss: 0.014350496377382014
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 570

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,57,1566.83,57000,-3.2203,-2.5,-4.16,322.03


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-10-24_13-48-16
  done: false
  episode_len_mean: 317.57
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.1756999999999764
  episode_reward_min: -4.159999999999956
  episodes_this_iter: 4
  episodes_total: 163
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9112569040722318
          entropy_coeff: 0.009999999999999998
          kl: 0.008444485181352155
          policy_loss: -0.002540884498092863
          total_loss: -0.003869716243611442
          vf_explained_var: 0.120509572327137
          vf_loss: 0.01736151058640745
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,58,1597.87,58000,-3.1757,-2.5,-4.16,317.57


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-10-24_13-48-48
  done: false
  episode_len_mean: 314.95
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.149499999999977
  episode_reward_min: -4.159999999999956
  episodes_this_iter: 3
  episodes_total: 166
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8792390373018053
          entropy_coeff: 0.009999999999999998
          kl: 0.007205016840471896
          policy_loss: -0.09949131136139235
          total_loss: -0.10429538703627056
          vf_explained_var: 0.18535976111888885
          vf_loss: 0.013628059056484038
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,59,1630.12,59000,-3.1495,-2.5,-4.16,314.95


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-10-24_13-49-20
  done: false
  episode_len_mean: 311.45
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.114499999999977
  episode_reward_min: -4.159999999999956
  episodes_this_iter: 4
  episodes_total: 170
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.855470531516605
          entropy_coeff: 0.009999999999999998
          kl: 0.006752913134271113
          policy_loss: 0.030178257491853502
          total_loss: 0.025029530914293396
          vf_explained_var: 0.26784804463386536
          vf_loss: 0.013068334396100707
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,60,1662.03,60000,-3.1145,-2.5,-4.16,311.45


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-10-24_13-49-54
  done: false
  episode_len_mean: 306.72
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.067199999999979
  episode_reward_min: -4.159999999999956
  episodes_this_iter: 4
  episodes_total: 174
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8147993723551432
          entropy_coeff: 0.009999999999999998
          kl: 0.00948778133449903
          policy_loss: 0.012578862574365404
          total_loss: 0.009015148257215817
          vf_explained_var: 0.21534617245197296
          vf_loss: 0.014109890845914682
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,61,1695.24,61000,-3.0672,-2.5,-4.16,306.72


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-10-24_13-50-26
  done: false
  episode_len_mean: 301.8
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.017999999999979
  episode_reward_min: -4.049999999999958
  episodes_this_iter: 4
  episodes_total: 178
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7776668614811368
          entropy_coeff: 0.009999999999999998
          kl: 0.014041781665708047
          policy_loss: -0.01745737368861834
          total_loss: -0.019657660606834625
          vf_explained_var: 0.15750481188297272
          vf_loss: 0.014874292403045627
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,62,1727.76,62000,-3.018,-2.5,-4.05,301.8




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-10-24_13-51-16
  done: false
  episode_len_mean: 298.07
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.9806999999999806
  episode_reward_min: -4.049999999999958
  episodes_this_iter: 3
  episodes_total: 181
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.662956209977468
          entropy_coeff: 0.009999999999999998
          kl: 0.008347620993515333
          policy_loss: -0.10997213878565365
          total_loss: -0.11157005147801505
          vf_explained_var: 0.16436432301998138
          vf_loss: 0.014614268795897563
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,63,1777.12,63000,-2.9807,-2.36,-4.05,298.07


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-10-24_13-51-47
  done: false
  episode_len_mean: 294.45
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.9444999999999806
  episode_reward_min: -4.049999999999958
  episodes_this_iter: 4
  episodes_total: 185
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6441487630208333
          entropy_coeff: 0.009999999999999998
          kl: 0.006657905462346747
          policy_loss: -0.009086063007513683
          total_loss: -0.01085330926709705
          vf_explained_var: 0.09369421005249023
          vf_loss: 0.014341347198933364
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 640

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,64,1808.2,64000,-2.9445,-2.36,-4.05,294.45


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-10-24_13-52-19
  done: false
  episode_len_mean: 289.79
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.897899999999982
  episode_reward_min: -4.049999999999958
  episodes_this_iter: 4
  episodes_total: 189
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6169296860694886
          entropy_coeff: 0.009999999999999998
          kl: 0.006055288375641194
          policy_loss: 0.014575867851575216
          total_loss: 0.011750376390086279
          vf_explained_var: 0.06582662463188171
          vf_loss: 0.013041041356821855
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 65000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,65,1840.32,65000,-2.8979,-2.36,-4.05,289.79


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-10-24_13-52-49
  done: false
  episode_len_mean: 285.68
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.8567999999999825
  episode_reward_min: -4.049999999999958
  episodes_this_iter: 4
  episodes_total: 193
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5212063683403863
          entropy_coeff: 0.009999999999999998
          kl: 0.006432129509238078
          policy_loss: 0.01163641901479827
          total_loss: 0.00984994661476877
          vf_explained_var: 0.09864071011543274
          vf_loss: 0.013103986303839419
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,66,1870.6,66000,-2.8568,-2.36,-4.05,285.68


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-10-24_13-53-21
  done: false
  episode_len_mean: 280.31
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.8030999999999846
  episode_reward_min: -3.5499999999999683
  episodes_this_iter: 4
  episodes_total: 197
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5092832605044046
          entropy_coeff: 0.009999999999999998
          kl: 0.0068425690639251965
          policy_loss: 0.006868941419654422
          total_loss: 0.006154790230923229
          vf_explained_var: 0.06040404736995697
          vf_loss: 0.01403655292880204
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 670

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,67,1902.7,67000,-2.8031,-2.36,-3.55,280.31


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-10-24_13-53-56
  done: false
  episode_len_mean: 276.24
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.7623999999999853
  episode_reward_min: -3.47999999999997
  episodes_this_iter: 4
  episodes_total: 201
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5647515773773193
          entropy_coeff: 0.009999999999999998
          kl: 0.007285638196774357
          policy_loss: 0.017278763982984755
          total_loss: 0.015546701062056753
          vf_explained_var: 0.1080409437417984
          vf_loss: 0.013551171734515163
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 68000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,68,1937.56,68000,-2.7624,-2.36,-3.48,276.24


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-10-24_13-54-30
  done: false
  episode_len_mean: 272.74
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.7273999999999865
  episode_reward_min: -3.229999999999975
  episodes_this_iter: 4
  episodes_total: 205
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5491428467962476
          entropy_coeff: 0.009999999999999998
          kl: 0.006484706241645534
          policy_loss: 0.01639820080664423
          total_loss: 0.015379443267981211
          vf_explained_var: 0.0523436963558197
          vf_loss: 0.014148435576094522
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 69000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,69,1971.64,69000,-2.7274,-2.36,-3.23,272.74


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-10-24_13-55-03
  done: false
  episode_len_mean: 270.25
  episode_media: {}
  episode_reward_max: -2.3599999999999937
  episode_reward_mean: -2.702499999999985
  episode_reward_min: -3.2099999999999755
  episodes_this_iter: 4
  episodes_total: 209
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7117195818159314
          entropy_coeff: 0.009999999999999998
          kl: 0.013230702139081372
          policy_loss: 0.004006277355882857
          total_loss: 0.0029204951806200874
          vf_explained_var: 0.02281450666487217
          vf_loss: 0.015369876939803361
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,70,2003.94,70000,-2.7025,-2.36,-3.21,270.25




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-10-24_13-55-52
  done: false
  episode_len_mean: 267.9
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.678999999999986
  episode_reward_min: -3.1699999999999764
  episodes_this_iter: 4
  episodes_total: 213
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7139503399531046
          entropy_coeff: 0.009999999999999998
          kl: 0.008260322019374182
          policy_loss: 0.035200661586390604
          total_loss: 0.03212252747681406
          vf_explained_var: 0.07477491348981857
          vf_loss: 0.013648352130419678
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,71,2053.8,71000,-2.679,-2.28,-3.17,267.9


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-10-24_13-56-25
  done: false
  episode_len_mean: 266.44
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.6643999999999877
  episode_reward_min: -3.049999999999979
  episodes_this_iter: 4
  episodes_total: 217
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5592495878537496
          entropy_coeff: 0.009999999999999998
          kl: 0.008669924377312555
          policy_loss: 0.02498373645875189
          total_loss: 0.020599088817834853
          vf_explained_var: 0.0848105326294899
          vf_loss: 0.010774353873502049
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,72,2085.84,72000,-2.6644,-2.28,-3.05,266.44


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-10-24_13-56-58
  done: false
  episode_len_mean: 265.68
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.6567999999999863
  episode_reward_min: -2.9199999999999817
  episodes_this_iter: 4
  episodes_total: 221
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6019215914938185
          entropy_coeff: 0.009999999999999998
          kl: 0.014771231813939936
          policy_loss: -0.01717242201169332
          total_loss: -0.016350777116086748
          vf_explained_var: 0.04576577618718147
          vf_loss: 0.016102296403712697
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 73

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,73,2119.76,73000,-2.6568,-2.28,-2.92,265.68


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-10-24_13-57-31
  done: false
  episode_len_mean: 264.46
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.644599999999987
  episode_reward_min: -2.9199999999999817
  episodes_this_iter: 4
  episodes_total: 225
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.4391031132804022
          entropy_coeff: 0.009999999999999998
          kl: 0.005679183074808552
          policy_loss: 0.006883026079999076
          total_loss: 0.007012276848157247
          vf_explained_var: 0.18955251574516296
          vf_loss: 0.014236320782866742
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 7400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,74,2152.69,74000,-2.6446,-2.28,-2.92,264.46


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-10-24_13-58-03
  done: false
  episode_len_mean: 262.92
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.6291999999999884
  episode_reward_min: -2.9199999999999817
  episodes_this_iter: 4
  episodes_total: 229
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.3156792004903157
          entropy_coeff: 0.009999999999999998
          kl: 0.007611965527962323
          policy_loss: 0.006761506034268274
          total_loss: 0.007925645013650258
          vf_explained_var: 0.1466636210680008
          vf_loss: 0.013940328794221083
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,75,2184.18,75000,-2.6292,-2.25,-2.92,262.92


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-10-24_13-58-36
  done: false
  episode_len_mean: 261.55
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.6154999999999875
  episode_reward_min: -2.9199999999999817
  episodes_this_iter: 4
  episodes_total: 233
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.3086388972070482
          entropy_coeff: 0.009999999999999998
          kl: 0.005593588202193919
          policy_loss: 0.021053226623270246
          total_loss: 0.022666201574934854
          vf_explained_var: 0.12492784857749939
          vf_loss: 0.014419685107552344
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 7600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,76,2217.61,76000,-2.6155,-2.25,-2.92,261.55


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-10-24_13-59-10
  done: false
  episode_len_mean: 260.09
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.600899999999988
  episode_reward_min: -2.9199999999999817
  episodes_this_iter: 4
  episodes_total: 237
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.3708100928200615
          entropy_coeff: 0.009999999999999998
          kl: 0.004303925289010143
          policy_loss: 0.014611925764216317
          total_loss: 0.01511111284295718
          vf_explained_var: 0.12921778857707977
          vf_loss: 0.013992092758417129
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 77000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,77,2250.73,77000,-2.6009,-2.25,-2.92,260.09




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-10-24_14-00-02
  done: false
  episode_len_mean: 258.68
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.5867999999999887
  episode_reward_min: -2.9199999999999817
  episodes_this_iter: 4
  episodes_total: 241
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 1.2725055509143406
          entropy_coeff: 0.009999999999999998
          kl: 0.006586731271201214
          policy_loss: 0.016471090912818908
          total_loss: 0.018288878599802653
          vf_explained_var: 0.09500110149383545
          vf_loss: 0.014378175894833273
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 78

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,78,2302.83,78000,-2.5868,-2.19,-2.92,258.68


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-10-24_14-00-37
  done: false
  episode_len_mean: 257.46
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.574599999999989
  episode_reward_min: -2.9199999999999817
  episodes_this_iter: 4
  episodes_total: 245
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 1.2972793738047281
          entropy_coeff: 0.009999999999999998
          kl: 0.00557703354378659
          policy_loss: 0.018231557640764447
          total_loss: 0.019380651497178606
          vf_explained_var: 0.18454040586948395
          vf_loss: 0.013982462510466576
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 7900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,79,2338.08,79000,-2.5746,-2.19,-2.92,257.46


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-10-24_14-01-12
  done: false
  episode_len_mean: 255.93
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.5592999999999893
  episode_reward_min: -2.9199999999999817
  episodes_this_iter: 4
  episodes_total: 249
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024999999999999994
          cur_lr: 5.000000000000001e-05
          entropy: 1.2028002169397143
          entropy_coeff: 0.009999999999999998
          kl: 0.003743473168825062
          policy_loss: -0.041593517280287214
          total_loss: -0.039764325155152214
          vf_explained_var: 0.11949621886014938
          vf_loss: 0.013763611091093884
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,80,2373.35,80000,-2.5593,-2.19,-2.92,255.93


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-10-24_14-01-47
  done: false
  episode_len_mean: 254.77
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.547699999999989
  episode_reward_min: -2.9199999999999817
  episodes_this_iter: 5
  episodes_total: 254
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.194903349876404
          entropy_coeff: 0.009999999999999998
          kl: 0.008132817542400122
          policy_loss: -0.02941139779157109
          total_loss: -0.02334882699780994
          vf_explained_var: 0.12114426493644714
          vf_loss: 0.017909942184471422
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 8100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,81,2408.47,81000,-2.5477,-2.19,-2.92,254.77


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-10-24_14-02-22
  done: false
  episode_len_mean: 253.7
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.5369999999999893
  episode_reward_min: -2.9199999999999817
  episodes_this_iter: 4
  episodes_total: 258
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.109177621205648
          entropy_coeff: 0.009999999999999998
          kl: 0.011064990746449944
          policy_loss: 0.0019756917738252217
          total_loss: 0.004783243354823854
          vf_explained_var: 0.18018405139446259
          vf_loss: 0.013761016353964806
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 820

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,82,2443.49,82000,-2.537,-2.19,-2.92,253.7


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-10-24_14-02-58
  done: false
  episode_len_mean: 252.13
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.52129999999999
  episode_reward_min: -2.9199999999999817
  episodes_this_iter: 4
  episodes_total: 262
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.0152728080749511
          entropy_coeff: 0.009999999999999998
          kl: 0.00854548165631807
          policy_loss: 0.043381691889630425
          total_loss: 0.043492080519596736
          vf_explained_var: 0.16184931993484497
          vf_loss: 0.010156299489446812
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 83000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,83,2479.03,83000,-2.5213,-2.19,-2.92,252.13


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-10-24_14-03-31
  done: false
  episode_len_mean: 250.29
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.50289999999999
  episode_reward_min: -2.909999999999982
  episodes_this_iter: 4
  episodes_total: 266
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 0.9396879984272851
          entropy_coeff: 0.009999999999999998
          kl: 0.005177083332431249
          policy_loss: -0.03103602727254232
          total_loss: -0.02655711786614524
          vf_explained_var: 0.17864227294921875
          vf_loss: 0.013811076608382994
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,84,2512.01,84000,-2.5029,-2.19,-2.91,250.29




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-10-24_14-04-24
  done: false
  episode_len_mean: 247.83
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.478299999999991
  episode_reward_min: -2.8299999999999836
  episodes_this_iter: 5
  episodes_total: 271
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012499999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 0.9374240868621402
          entropy_coeff: 0.009999999999999998
          kl: 0.004984585201702569
          policy_loss: -0.012237703551848729
          total_loss: -0.0044098546935452355
          vf_explained_var: 0.14866098761558533
          vf_loss: 0.017139784753736523
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,85,2565.23,85000,-2.4783,-2.03,-2.83,247.83


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-10-24_14-04-59
  done: false
  episode_len_mean: 246.73
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.4672999999999914
  episode_reward_min: -2.8299999999999836
  episodes_this_iter: 4
  episodes_total: 275
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006249999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.8806563516457876
          entropy_coeff: 0.009999999999999998
          kl: 0.004749406619489744
          policy_loss: 0.038010713458061215
          total_loss: 0.04260693167646726
          vf_explained_var: 0.1140073761343956
          vf_loss: 0.013373098149895669
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 8600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,86,2599.85,86000,-2.4673,-2.03,-2.83,246.73


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-10-24_14-05-34
  done: false
  episode_len_mean: 245.28
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.4527999999999914
  episode_reward_min: -2.759999999999985
  episodes_this_iter: 4
  episodes_total: 279
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0031249999999999993
          cur_lr: 5.000000000000001e-05
          entropy: 0.8528572565979428
          entropy_coeff: 0.009999999999999998
          kl: 0.004191921375099778
          policy_loss: -0.04614872758587201
          total_loss: -0.03935868905650245
          vf_explained_var: 0.023441558703780174
          vf_loss: 0.015305509945998589
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,87,2635.09,87000,-2.4528,-2.03,-2.76,245.28


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2021-10-24_14-06-09
  done: false
  episode_len_mean: 243.85
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.438499999999992
  episode_reward_min: -2.759999999999985
  episodes_this_iter: 5
  episodes_total: 284
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015624999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.0001492785082924
          entropy_coeff: 0.009999999999999998
          kl: 0.003887963692452148
          policy_loss: -0.001894808808962504
          total_loss: 0.0035319018695089553
          vf_explained_var: 0.21359889209270477
          vf_loss: 0.015422126578374041
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_steps_sampled: 88000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,88,2669.6,88000,-2.4385,-2.03,-2.76,243.85


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 89000
  custom_metrics: {}
  date: 2021-10-24_14-06-43
  done: false
  episode_len_mean: 242.71
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.427099999999992
  episode_reward_min: -2.759999999999985
  episodes_this_iter: 4
  episodes_total: 288
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007812499999999998
          cur_lr: 5.000000000000001e-05
          entropy: 0.8071586377090878
          entropy_coeff: 0.009999999999999998
          kl: 0.007467644785560168
          policy_loss: 0.015973518209324942
          total_loss: 0.021207677904102537
          vf_explained_var: 0.10487408936023712
          vf_loss: 0.013299916001657645
    num_agent_steps_sampled: 89000
    num_agent_steps_trained: 89000
    num_steps_sampled: 89000
    num_steps_trained: 890

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,89,2704.26,89000,-2.4271,-2.03,-2.76,242.71


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-10-24_14-07-19
  done: false
  episode_len_mean: 241.73
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.417299999999992
  episode_reward_min: -2.759999999999985
  episodes_this_iter: 4
  episodes_total: 292
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007812499999999998
          cur_lr: 5.000000000000001e-05
          entropy: 0.7129866712623172
          entropy_coeff: 0.009999999999999998
          kl: 0.0029825534614786875
          policy_loss: -0.025014345058136517
          total_loss: -0.021234332356188032
          vf_explained_var: 0.2834034264087677
          vf_loss: 0.010907548924701081
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,90,2739.62,90000,-2.4173,-2.03,-2.76,241.73


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 91000
  custom_metrics: {}
  date: 2021-10-24_14-07-55
  done: false
  episode_len_mean: 240.4
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.403999999999993
  episode_reward_min: -2.759999999999985
  episodes_this_iter: 5
  episodes_total: 297
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003906249999999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.6756083879205915
          entropy_coeff: 0.009999999999999998
          kl: 0.002695597678833571
          policy_loss: -0.005620761919352743
          total_loss: 0.0035907172080543305
          vf_explained_var: 0.10129614919424057
          vf_loss: 0.015966512604306142
    num_agent_steps_sampled: 91000
    num_agent_steps_trained: 91000
    num_steps_sampled: 91000
    num_steps_trained: 91

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,91,2775.45,91000,-2.404,-2.03,-2.76,240.4




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2021-10-24_14-08-46
  done: false
  episode_len_mean: 239.12
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.3911999999999924
  episode_reward_min: -2.759999999999985
  episodes_this_iter: 4
  episodes_total: 301
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00019531249999999996
          cur_lr: 5.000000000000001e-05
          entropy: 0.6503707746664683
          entropy_coeff: 0.009999999999999998
          kl: 0.0035408857120272173
          policy_loss: -0.07380921931730376
          total_loss: -0.06772244738207923
          vf_explained_var: 0.08067691326141357
          vf_loss: 0.012589786325891812
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_steps_sampled: 92000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,92,2827.28,92000,-2.3912,-2.01,-2.76,239.12


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 93000
  custom_metrics: {}
  date: 2021-10-24_14-09-22
  done: false
  episode_len_mean: 237.92
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.3791999999999933
  episode_reward_min: -2.759999999999985
  episodes_this_iter: 5
  episodes_total: 306
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.765624999999998e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.7352458165751563
          entropy_coeff: 0.009999999999999998
          kl: 0.006941747074102977
          policy_loss: -0.043642450124025345
          total_loss: -0.03459905766778522
          vf_explained_var: 0.1115916445851326
          vf_loss: 0.016395170593427287
    num_agent_steps_sampled: 93000
    num_agent_steps_trained: 93000
    num_steps_sampled: 93000
    num_steps_trained: 930

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,93,2862.52,93000,-2.3792,-2.01,-2.76,237.92


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 94000
  custom_metrics: {}
  date: 2021-10-24_14-09-58
  done: false
  episode_len_mean: 236.86
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.368599999999993
  episode_reward_min: -2.759999999999985
  episodes_this_iter: 4
  episodes_total: 310
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.765624999999998e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.6501571271154616
          entropy_coeff: 0.009999999999999998
          kl: 0.005067001579162265
          policy_loss: 0.00752199747496181
          total_loss: 0.012611712846491072
          vf_explained_var: 0.2024020254611969
          vf_loss: 0.011590795105116234
    num_agent_steps_sampled: 94000
    num_agent_steps_trained: 94000
    num_steps_sampled: 94000
    num_steps_trained: 94000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,94,2899.25,94000,-2.3686,-2.01,-2.76,236.86


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 95000
  custom_metrics: {}
  date: 2021-10-24_14-10-34
  done: false
  episode_len_mean: 234.83
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.348299999999994
  episode_reward_min: -2.719999999999986
  episodes_this_iter: 5
  episodes_total: 315
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.765624999999998e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5255890574720171
          entropy_coeff: 0.009999999999999998
          kl: 0.003033218959904755
          policy_loss: 0.007795814507537418
          total_loss: 0.016725203312105603
          vf_explained_var: 0.15644387900829315
          vf_loss: 0.014184985775500536
    num_agent_steps_sampled: 95000
    num_agent_steps_trained: 95000
    num_steps_sampled: 95000
    num_steps_trained: 9500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,95,2935.19,95000,-2.3483,-2.01,-2.72,234.83


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2021-10-24_14-11-11
  done: false
  episode_len_mean: 233.22
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.3321999999999945
  episode_reward_min: -2.579999999999989
  episodes_this_iter: 4
  episodes_total: 319
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.882812499999999e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.3999578419658873
          entropy_coeff: 0.009999999999999998
          kl: 0.0041667050031701965
          policy_loss: 0.006040275428030226
          total_loss: 0.012379464589887194
          vf_explained_var: 0.17526651918888092
          vf_loss: 0.010338567776812448
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_steps_sampled: 96000
    num_steps_trained: 96

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,96,2971.87,96000,-2.3322,-2.01,-2.58,233.22


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 97000
  custom_metrics: {}
  date: 2021-10-24_14-11-47
  done: false
  episode_len_mean: 232.16
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.3215999999999943
  episode_reward_min: -2.579999999999989
  episodes_this_iter: 5
  episodes_total: 324
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4414062499999995e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.6230243583520253
          entropy_coeff: 0.009999999999999998
          kl: 0.07356970500118515
          policy_loss: -0.008097355647219553
          total_loss: 0.003571563959121704
          vf_explained_var: 0.055365897715091705
          vf_loss: 0.01789736787064208
    num_agent_steps_sampled: 97000
    num_agent_steps_trained: 97000
    num_steps_sampled: 97000
    num_steps_trained: 97

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,97,3007.43,97000,-2.3216,-2.01,-2.58,232.16


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 98000
  custom_metrics: {}
  date: 2021-10-24_14-12-23
  done: false
  episode_len_mean: 231.52
  episode_media: {}
  episode_reward_max: -2.010000000000001
  episode_reward_mean: -2.3151999999999946
  episode_reward_min: -2.579999999999989
  episodes_this_iter: 4
  episodes_total: 328
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.662109375e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5386399855216344
          entropy_coeff: 0.009999999999999998
          kl: 0.006972965296538192
          policy_loss: 0.023830982132090464
          total_loss: 0.031007749173376294
          vf_explained_var: 0.13894133269786835
          vf_loss: 0.012562912278291252
    num_agent_steps_sampled: 98000
    num_agent_steps_trained: 98000
    num_steps_sampled: 98000
    num_steps_trained: 98000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,98,3043.46,98000,-2.3152,-2.01,-2.58,231.52




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 99000
  custom_metrics: {}
  date: 2021-10-24_14-13-14
  done: false
  episode_len_mean: 230.58
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3057999999999947
  episode_reward_min: -2.579999999999989
  episodes_this_iter: 5
  episodes_total: 333
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.662109375e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.6529193096690707
          entropy_coeff: 0.009999999999999998
          kl: 0.01852685574078434
          policy_loss: -0.034465215769078995
          total_loss: -0.024184731642405193
          vf_explained_var: 0.15444035828113556
          vf_loss: 0.016809003427624702
    num_agent_steps_sampled: 99000
    num_agent_steps_trained: 99000
    num_steps_sampled: 99000
    num_steps_trained: 99000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,99,3094.72,99000,-2.3058,-1.93,-2.58,230.58


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-10-24_14-13-51
  done: false
  episode_len_mean: 229.76
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2975999999999948
  episode_reward_min: -2.579999999999989
  episodes_this_iter: 4
  episodes_total: 337
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.662109375e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5602583017614152
          entropy_coeff: 0.009999999999999998
          kl: 0.0032486958469500377
          policy_loss: 0.03156962738268905
          total_loss: 0.03843191812435786
          vf_explained_var: 0.15803495049476624
          vf_loss: 0.012464754210991993
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 10000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,100,3131.6,100000,-2.2976,-1.93,-2.58,229.76


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 101000
  custom_metrics: {}
  date: 2021-10-24_14-14-28
  done: false
  episode_len_mean: 229.06
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.290599999999995
  episode_reward_min: -2.53999999999999
  episodes_this_iter: 4
  episodes_total: 341
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8310546875e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5495842927032046
          entropy_coeff: 0.009999999999999998
          kl: 0.003935811847607182
          policy_loss: -0.032523603902922735
          total_loss: -0.025541627324289745
          vf_explained_var: 0.14634329080581665
          vf_loss: 0.012477751014133294
    num_agent_steps_sampled: 101000
    num_agent_steps_trained: 101000
    num_steps_sampled: 101000
    num_steps_trained: 101

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,101,3169.13,101000,-2.2906,-1.93,-2.54,229.06


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 102000
  custom_metrics: {}
  date: 2021-10-24_14-15-05
  done: false
  episode_len_mean: 228.12
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.281199999999995
  episode_reward_min: -2.53999999999999
  episodes_this_iter: 5
  episodes_total: 346
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.1552734375e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.5153044525119993
          entropy_coeff: 0.009999999999999998
          kl: 0.004605702757578298
          policy_loss: -0.0023487175504366556
          total_loss: 0.0055538120369116465
          vf_explained_var: 0.25401973724365234
          vf_loss: 0.013055535550746653
    num_agent_steps_sampled: 102000
    num_agent_steps_trained: 102000
    num_steps_sampled: 102000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,102,3205.14,102000,-2.2812,-1.93,-2.54,228.12


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 103000
  custom_metrics: {}
  date: 2021-10-24_14-15-42
  done: false
  episode_len_mean: 227.51
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2750999999999952
  episode_reward_min: -2.53999999999999
  episodes_this_iter: 4
  episodes_total: 350
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.57763671875e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.41337089737256366
          entropy_coeff: 0.009999999999999998
          kl: 0.003396988532157942
          policy_loss: 0.007935110645161735
          total_loss: 0.014442344547973739
          vf_explained_var: 0.18229785561561584
          vf_loss: 0.010640930084304678
    num_agent_steps_sampled: 103000
    num_agent_steps_trained: 103000
    num_steps_sampled: 103000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,103,3242.56,103000,-2.2751,-1.93,-2.54,227.51


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2021-10-24_14-16-19
  done: false
  episode_len_mean: 227.08
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2707999999999955
  episode_reward_min: -2.53999999999999
  episodes_this_iter: 5
  episodes_total: 355
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.288818359375e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6151476717657514
          entropy_coeff: 0.009999999999999998
          kl: 0.004541725390428929
          policy_loss: -0.012216410040855408
          total_loss: -0.0027110187543763053
          vf_explained_var: 0.15242302417755127
          vf_loss: 0.015656862325138517
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_steps_sampled: 104000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,104,3280.03,104000,-2.2708,-1.93,-2.54,227.08


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 105000
  custom_metrics: {}
  date: 2021-10-24_14-16-56
  done: false
  episode_len_mean: 226.73
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.267299999999995
  episode_reward_min: -2.53999999999999
  episodes_this_iter: 4
  episodes_total: 359
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1444091796875e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.4582119915220473
          entropy_coeff: 0.009999999999999998
          kl: 0.008836527197748802
          policy_loss: 0.017805405457814536
          total_loss: 0.025616566671265495
          vf_explained_var: 0.0648215264081955
          vf_loss: 0.012393271302183468
    num_agent_steps_sampled: 105000
    num_agent_steps_trained: 105000
    num_steps_sampled: 105000
    num_steps_trained: 105

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,105,3316.71,105000,-2.2673,-1.93,-2.54,226.73




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 106000
  custom_metrics: {}
  date: 2021-10-24_14-17-50
  done: false
  episode_len_mean: 226.12
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2611999999999957
  episode_reward_min: -2.53999999999999
  episodes_this_iter: 5
  episodes_total: 364
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1444091796875e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6365479177898831
          entropy_coeff: 0.009999999999999998
          kl: 0.007877317884222634
          policy_loss: -0.006320915950669183
          total_loss: 0.004606870727406608
          vf_explained_var: 0.07436185330152512
          vf_loss: 0.01729325961528553
    num_agent_steps_sampled: 106000
    num_agent_steps_trained: 106000
    num_steps_sampled: 106000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,106,3370.47,106000,-2.2612,-1.93,-2.54,226.12


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 107000
  custom_metrics: {}
  date: 2021-10-24_14-18-24
  done: false
  episode_len_mean: 225.81
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2580999999999953
  episode_reward_min: -2.53999999999999
  episodes_this_iter: 4
  episodes_total: 368
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1444091796875e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.43379965358310274
          entropy_coeff: 0.009999999999999998
          kl: 0.0024357337421389187
          policy_loss: 0.026795685787995658
          total_loss: 0.03491421358452903
          vf_explained_var: 0.053023580461740494
          vf_loss: 0.012456523192425569
    num_agent_steps_sampled: 107000
    num_agent_steps_trained: 107000
    num_steps_sampled: 107000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,107,3404.88,107000,-2.2581,-1.93,-2.54,225.81


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2021-10-24_14-19-00
  done: false
  episode_len_mean: 225.6
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.255999999999996
  episode_reward_min: -2.53999999999999
  episodes_this_iter: 5
  episodes_total: 373
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.7220458984375e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.49564829071362815
          entropy_coeff: 0.009999999999999998
          kl: 0.0030646130619105608
          policy_loss: -0.01878499537706375
          total_loss: -0.007087736576795578
          vf_explained_var: 0.0735543891787529
          vf_loss: 0.01665374135805501
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_steps_sampled: 108000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,108,3440.92,108000,-2.256,-1.93,-2.54,225.6


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 109000
  custom_metrics: {}
  date: 2021-10-24_14-19-37
  done: false
  episode_len_mean: 225.26
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2525999999999957
  episode_reward_min: -2.53999999999999
  episodes_this_iter: 4
  episodes_total: 377
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.86102294921875e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.5081882764895757
          entropy_coeff: 0.009999999999999998
          kl: 0.003426184891495391
          policy_loss: 0.02621337870756785
          total_loss: 0.033719621267583634
          vf_explained_var: 0.0949847623705864
          vf_loss: 0.012588125922613673
    num_agent_steps_sampled: 109000
    num_agent_steps_trained: 109000
    num_steps_sampled: 109000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,109,3477.29,109000,-2.2526,-1.93,-2.54,225.26


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-10-24_14-20-13
  done: false
  episode_len_mean: 224.99
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2498999999999962
  episode_reward_min: -2.53999999999999
  episodes_this_iter: 5
  episodes_total: 382
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.430511474609375e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.5957576387458378
          entropy_coeff: 0.009999999999999998
          kl: 0.004352790328791735
          policy_loss: -0.007871108750502268
          total_loss: 0.003484443575143814
          vf_explained_var: 0.05126935988664627
          vf_loss: 0.017313126598795255
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,110,3513.12,110000,-2.2499,-1.93,-2.54,224.99


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 111000
  custom_metrics: {}
  date: 2021-10-24_14-20-48
  done: false
  episode_len_mean: 224.86
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2485999999999966
  episode_reward_min: -2.53999999999999
  episodes_this_iter: 4
  episodes_total: 386
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.152557373046875e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.7212026993433635
          entropy_coeff: 0.009999999999999998
          kl: 0.005156121253396072
          policy_loss: 0.015578163746330474
          total_loss: 0.021808903084860908
          vf_explained_var: 0.09067834913730621
          vf_loss: 0.013442763975924915
    num_agent_steps_sampled: 111000
    num_agent_steps_trained: 111000
    num_steps_sampled: 111000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,111,3548.2,111000,-2.2486,-1.93,-2.54,224.86




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-10-24_14-21-39
  done: false
  episode_len_mean: 224.63
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.246299999999996
  episode_reward_min: -2.579999999999989
  episodes_this_iter: 4
  episodes_total: 390
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.152557373046875e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8581100516849094
          entropy_coeff: 0.009999999999999998
          kl: 0.005584556892321283
          policy_loss: -0.020282677312692007
          total_loss: -0.015145190722412534
          vf_explained_var: 0.09455060213804245
          vf_loss: 0.013718587170458502
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,112,3599.46,112000,-2.2463,-1.93,-2.58,224.63


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-10-24_14-22-15
  done: false
  episode_len_mean: 225.09
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2508999999999957
  episode_reward_min: -2.5899999999999888
  episodes_this_iter: 5
  episodes_total: 395
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.152557373046875e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8979679233498044
          entropy_coeff: 0.009999999999999998
          kl: 0.013799669739704474
          policy_loss: -0.01547656249668863
          total_loss: -0.007259251756800545
          vf_explained_var: 0.15145809948444366
          vf_loss: 0.017196989887290532
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,113,3635.33,113000,-2.2509,-1.93,-2.59,225.09


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 114000
  custom_metrics: {}
  date: 2021-10-24_14-22-50
  done: false
  episode_len_mean: 225.49
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2548999999999957
  episode_reward_min: -2.5899999999999888
  episodes_this_iter: 4
  episodes_total: 399
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.152557373046875e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8169161286618974
          entropy_coeff: 0.009999999999999998
          kl: 0.004150703684132553
          policy_loss: 0.015615962673392561
          total_loss: 0.0201341077271435
          vf_explained_var: 0.2009023278951645
          vf_loss: 0.012687306250962945
    num_agent_steps_sampled: 114000
    num_agent_steps_trained: 114000
    num_steps_sampled: 114000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,114,3670.1,114000,-2.2549,-1.93,-2.59,225.49


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 115000
  custom_metrics: {}
  date: 2021-10-24_14-23-23
  done: false
  episode_len_mean: 226.1
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2609999999999952
  episode_reward_min: -2.5899999999999888
  episodes_this_iter: 4
  episodes_total: 403
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5762786865234374e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.7450522091653612
          entropy_coeff: 0.009999999999999998
          kl: 0.005662928244764921
          policy_loss: 0.009028987255361345
          total_loss: 0.015129440401991209
          vf_explained_var: 0.09296313673257828
          vf_loss: 0.013550976208514637
    num_agent_steps_sampled: 115000
    num_agent_steps_trained: 115000
    num_steps_sampled: 115000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,115,3703.5,115000,-2.261,-1.93,-2.59,226.1


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2021-10-24_14-23-59
  done: false
  episode_len_mean: 226.27
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2626999999999953
  episode_reward_min: -2.5899999999999888
  episodes_this_iter: 4
  episodes_total: 407
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5762786865234374e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6312297191884783
          entropy_coeff: 0.009999999999999998
          kl: 0.004031243974887467
          policy_loss: -0.04250357043411997
          total_loss: -0.03517894256446097
          vf_explained_var: 0.11245667934417725
          vf_loss: 0.013636927575700813
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_steps_sampled: 116000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,116,3738.83,116000,-2.2627,-1.93,-2.59,226.27


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 117000
  custom_metrics: {}
  date: 2021-10-24_14-24-34
  done: false
  episode_len_mean: 226.57
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.265699999999995
  episode_reward_min: -2.5899999999999888
  episodes_this_iter: 5
  episodes_total: 412
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7881393432617187e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6707199652989705
          entropy_coeff: 0.009999999999999998
          kl: 0.003085861725271223
          policy_loss: -0.01470608553952641
          total_loss: -0.004117316835456424
          vf_explained_var: 0.09271086752414703
          vf_loss: 0.017295969298316373
    num_agent_steps_sampled: 117000
    num_agent_steps_trained: 117000
    num_steps_sampled: 117000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,117,3773.86,117000,-2.2657,-1.93,-2.59,226.57


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 118000
  custom_metrics: {}
  date: 2021-10-24_14-25-09
  done: false
  episode_len_mean: 226.73
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2672999999999957
  episode_reward_min: -2.5899999999999888
  episodes_this_iter: 4
  episodes_total: 416
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.940696716308593e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.5386604819032881
          entropy_coeff: 0.009999999999999998
          kl: 0.003567569382182809
          policy_loss: 0.028861620194382136
          total_loss: 0.034323945807086094
          vf_explained_var: 0.11112948507070541
          vf_loss: 0.010848927130508754
    num_agent_steps_sampled: 118000
    num_agent_steps_trained: 118000
    num_steps_sampled: 118000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,118,3809.71,118000,-2.2673,-1.93,-2.59,226.73




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 119000
  custom_metrics: {}
  date: 2021-10-24_14-26-03
  done: false
  episode_len_mean: 226.9
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2689999999999957
  episode_reward_min: -2.5899999999999888
  episodes_this_iter: 5
  episodes_total: 421
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.470348358154297e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6521365529961056
          entropy_coeff: 0.009999999999999998
          kl: 0.005265877956954136
          policy_loss: -0.022585116244024702
          total_loss: -0.010123679372999404
          vf_explained_var: 0.06916885823011398
          vf_loss: 0.01898280143116911
    num_agent_steps_sampled: 119000
    num_agent_steps_trained: 119000
    num_steps_sampled: 119000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,119,3863.68,119000,-2.269,-1.93,-2.59,226.9


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-10-24_14-26-37
  done: false
  episode_len_mean: 227.58
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2757999999999954
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 425
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.470348358154297e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9586391501956516
          entropy_coeff: 0.009999999999999998
          kl: 0.009692963810608843
          policy_loss: 0.01288099918100569
          total_loss: 0.01725533397661315
          vf_explained_var: 0.08545073866844177
          vf_loss: 0.01396072506904602
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,120,3897.58,120000,-2.2758,-1.93,-2.85,227.58


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 121000
  custom_metrics: {}
  date: 2021-10-24_14-27-12
  done: false
  episode_len_mean: 227.81
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2780999999999954
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 429
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.470348358154297e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6427232417795393
          entropy_coeff: 0.009999999999999998
          kl: 0.0042301514468
          policy_loss: 0.024696239166789584
          total_loss: 0.03196198874049717
          vf_explained_var: 0.1254882514476776
          vf_loss: 0.013692979286942217
    num_agent_steps_sampled: 121000
    num_agent_steps_trained: 121000
    num_steps_sampled: 121000
    num_steps_trained: 12100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,121,3932.48,121000,-2.2781,-1.93,-2.85,227.81


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 122000
  custom_metrics: {}
  date: 2021-10-24_14-27-47
  done: false
  episode_len_mean: 228.51
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.2850999999999955
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 433
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2351741790771484e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7348436103926764
          entropy_coeff: 0.009999999999999998
          kl: 0.0063005875488492
          policy_loss: -0.0012662548157903882
          total_loss: 0.005855242411295573
          vf_explained_var: 0.06923781335353851
          vf_loss: 0.014469935124119123
    num_agent_steps_sampled: 122000
    num_agent_steps_trained: 122000
    num_steps_sampled: 122000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,122,3966.79,122000,-2.2851,-1.93,-2.85,228.51


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 123000
  custom_metrics: {}
  date: 2021-10-24_14-28-19
  done: false
  episode_len_mean: 228.99
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.289899999999995
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 437
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2351741790771484e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7426974885993534
          entropy_coeff: 0.009999999999999998
          kl: 0.0048835016481360485
          policy_loss: -0.04355158996250894
          total_loss: -0.036369036303626166
          vf_explained_var: 0.10556825995445251
          vf_loss: 0.014609524483482043
    num_agent_steps_sampled: 123000
    num_agent_steps_trained: 123000
    num_steps_sampled: 123000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,123,3999.41,123000,-2.2899,-1.93,-2.85,228.99


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-10-24_14-28-54
  done: false
  episode_len_mean: 229.21
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.292099999999995
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 5
  episodes_total: 442
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1175870895385742e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6124577826923794
          entropy_coeff: 0.009999999999999998
          kl: 0.0023363238699609214
          policy_loss: -0.004861614770359463
          total_loss: 0.006208226250277625
          vf_explained_var: 0.12751153111457825
          vf_loss: 0.017194417708863815
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,124,4033.68,124000,-2.2921,-1.93,-2.85,229.21


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 125000
  custom_metrics: {}
  date: 2021-10-24_14-29-31
  done: false
  episode_len_mean: 229.54
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.295399999999995
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 446
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.587935447692871e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.5525793800751369
          entropy_coeff: 0.009999999999999998
          kl: 0.0026193519137526285
          policy_loss: 0.018967998110585742
          total_loss: 0.02700096012817489
          vf_explained_var: 0.10669311136007309
          vf_loss: 0.013558752990017334
    num_agent_steps_sampled: 125000
    num_agent_steps_trained: 125000
    num_steps_sampled: 125000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,125,4070.76,125000,-2.2954,-1.93,-2.85,229.54




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 126000
  custom_metrics: {}
  date: 2021-10-24_14-30-28
  done: false
  episode_len_mean: 229.39
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.293899999999995
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 5
  episodes_total: 451
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7939677238464354e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.5039425472418467
          entropy_coeff: 0.009999999999999998
          kl: 0.0029796943016980907
          policy_loss: -0.021350785262054868
          total_loss: -0.008979235920641157
          vf_explained_var: 0.12019248306751251
          vf_loss: 0.017410974421848854
    num_agent_steps_sampled: 126000
    num_agent_steps_trained: 126000
    num_steps_sampled: 126000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,126,4128.24,126000,-2.2939,-1.93,-2.85,229.39


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 127000
  custom_metrics: {}
  date: 2021-10-24_14-31-07
  done: false
  episode_len_mean: 229.84
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.298399999999995
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 455
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3969838619232177e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7926800711287393
          entropy_coeff: 0.009999999999999998
          kl: 0.005048125683541116
          policy_loss: 0.021090817948182423
          total_loss: 0.026956383056110807
          vf_explained_var: 0.10323823988437653
          vf_loss: 0.013792368645469347
    num_agent_steps_sampled: 127000
    num_agent_steps_trained: 127000
    num_steps_sampled: 127000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,127,4167.34,127000,-2.2984,-1.93,-2.85,229.84


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-10-24_14-31-48
  done: false
  episode_len_mean: 230.05
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3004999999999947
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 459
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3969838619232177e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.668900438480907
          entropy_coeff: 0.009999999999999998
          kl: 0.002763907532524949
          policy_loss: 0.026127682998776436
          total_loss: 0.032627915632393624
          vf_explained_var: 0.0674629956483841
          vf_loss: 0.013189236478259167
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,128,4207.8,128000,-2.3005,-1.93,-2.85,230.05


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 129000
  custom_metrics: {}
  date: 2021-10-24_14-32-24
  done: false
  episode_len_mean: 230.55
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3054999999999946
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 5
  episodes_total: 464
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.984919309616089e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.5414771907859378
          entropy_coeff: 0.009999999999999998
          kl: 0.00397567713296654
          policy_loss: -0.03489278223779466
          total_loss: -0.022726591842042076
          vf_explained_var: 0.12443017959594727
          vf_loss: 0.017580961135940418
    num_agent_steps_sampled: 129000
    num_agent_steps_trained: 129000
    num_steps_sampled: 129000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,129,4243.88,129000,-2.3055,-1.93,-2.85,230.55


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-10-24_14-33-02
  done: false
  episode_len_mean: 230.87
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3086999999999946
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 468
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.492459654808044e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6642578197850122
          entropy_coeff: 0.009999999999999998
          kl: 0.008748047371221269
          policy_loss: 0.028383769177728228
          total_loss: 0.03589715626504686
          vf_explained_var: 0.10622832924127579
          vf_loss: 0.014155967067927122
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,130,4282.2,130000,-2.3087,-1.93,-2.85,230.87


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 131000
  custom_metrics: {}
  date: 2021-10-24_14-33-38
  done: false
  episode_len_mean: 231.46
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.314599999999994
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 472
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.492459654808044e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6012796954976187
          entropy_coeff: 0.009999999999999998
          kl: 0.006520568633334594
          policy_loss: 0.024470579541391795
          total_loss: 0.03183350082900789
          vf_explained_var: 0.1390974223613739
          vf_loss: 0.013375717680901289
    num_agent_steps_sampled: 131000
    num_agent_steps_trained: 131000
    num_steps_sampled: 131000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,131,4318,131000,-2.3146,-1.93,-2.85,231.46


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2021-10-24_14-34-12
  done: false
  episode_len_mean: 231.35
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.313499999999995
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 5
  episodes_total: 477
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.492459654808044e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.3881260573863983
          entropy_coeff: 0.009999999999999998
          kl: 0.0014302081797085166
          policy_loss: -0.010243340830008189
          total_loss: 0.003332324243254132
          vf_explained_var: 0.0906200110912323
          vf_loss: 0.017456925339582895
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_steps_sampled: 132000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,132,4352.23,132000,-2.3135,-1.93,-2.85,231.35




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 133000
  custom_metrics: {}
  date: 2021-10-24_14-35-07
  done: false
  episode_len_mean: 231.02
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3101999999999943
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 481
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.746229827404022e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.3912694540288713
          entropy_coeff: 0.009999999999999998
          kl: 0.0025197066569439307
          policy_loss: 0.03938919934961531
          total_loss: 0.04906545811229282
          vf_explained_var: 0.05556079000234604
          vf_loss: 0.013588954860137569
    num_agent_steps_sampled: 133000
    num_agent_steps_trained: 133000
    num_steps_sampled: 133000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,133,4407.03,133000,-2.3102,-1.93,-2.85,231.02


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 134000
  custom_metrics: {}
  date: 2021-10-24_14-35-44
  done: false
  episode_len_mean: 231.21
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.3120999999999943
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 485
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.73114913702011e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.43531002120839224
          entropy_coeff: 0.009999999999999998
          kl: 0.018737291256191252
          policy_loss: -0.047482193923658794
          total_loss: -0.034203535980648465
          vf_explained_var: 0.018661221489310265
          vf_loss: 0.0176317579837309
    num_agent_steps_sampled: 134000
    num_agent_steps_trained: 134000
    num_steps_sampled: 134000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,134,4443.49,134000,-2.3121,-1.93,-2.85,231.21


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 135000
  custom_metrics: {}
  date: 2021-10-24_14-36-16
  done: false
  episode_len_mean: 230.87
  episode_media: {}
  episode_reward_max: -1.9300000000000015
  episode_reward_mean: -2.308699999999994
  episode_reward_min: -2.849999999999983
  episodes_this_iter: 4
  episodes_total: 489
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.73114913702011e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.6541353589958615
          entropy_coeff: 0.009999999999999998
          kl: 0.012117441700077178
          policy_loss: -0.11389175785912407
          total_loss: -0.10334316632813878
          vf_explained_var: 0.0860644057393074
          vf_loss: 0.017089945264160633
    num_agent_steps_sampled: 135000
    num_agent_steps_trained: 135000
    num_steps_sampled: 135000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,135,4476.32,135000,-2.3087,-1.93,-2.85,230.87


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-10-24_14-36-44
  done: false
  episode_len_mean: 234.16
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.3415999999999944
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 493
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.73114913702011e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.565693978468577
          entropy_coeff: 0.009999999999999998
          kl: 0.018272887623878256
          policy_loss: 0.0018837882412804498
          total_loss: 0.0017498183581564162
          vf_explained_var: 0.11554254591464996
          vf_loss: 0.015522970052229034
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,136,4503.99,136000,-2.3416,-1.96,-3.4,234.16


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 137000
  custom_metrics: {}
  date: 2021-10-24_14-37-17
  done: false
  episode_len_mean: 235.09
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.350899999999994
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 497
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.73114913702011e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.9773135046164195
          entropy_coeff: 0.009999999999999998
          kl: 0.006798189185501732
          policy_loss: 7.62972566816542e-05
          total_loss: 0.004925903264019225
          vf_explained_var: 0.1934642344713211
          vf_loss: 0.01462274023021261
    num_agent_steps_sampled: 137000
    num_agent_steps_trained: 137000
    num_steps_sampled: 137000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,137,4536.83,137000,-2.3509,-1.96,-3.4,235.09


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 138000
  custom_metrics: {}
  date: 2021-10-24_14-37-49
  done: false
  episode_len_mean: 236.01
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.360099999999994
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 3
  episodes_total: 500
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.73114913702011e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.0349299165937635
          entropy_coeff: 0.009999999999999998
          kl: 0.006816661002614586
          policy_loss: -0.08825313730372322
          total_loss: -0.08472214879261122
          vf_explained_var: 0.2242291122674942
          vf_loss: 0.01388028697628114
    num_agent_steps_sampled: 138000
    num_agent_steps_trained: 138000
    num_steps_sampled: 138000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,138,4568.42,138000,-2.3601,-1.96,-3.4,236.01


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 139000
  custom_metrics: {}
  date: 2021-10-24_14-38-20
  done: false
  episode_len_mean: 237.31
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.3730999999999933
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 504
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.73114913702011e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.008568126625485
          entropy_coeff: 0.009999999999999998
          kl: 0.004463800990215776
          policy_loss: -0.02059173492921723
          total_loss: -0.01743556418352657
          vf_explained_var: 0.3152037262916565
          vf_loss: 0.013241853182100588
    num_agent_steps_sampled: 139000
    num_agent_steps_trained: 139000
    num_steps_sampled: 139000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,139,4600.01,139000,-2.3731,-1.96,-3.4,237.31


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-10-24_14-38-53
  done: false
  episode_len_mean: 238.77
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.3876999999999926
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 508
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.365574568510055e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.0216674970255957
          entropy_coeff: 0.009999999999999998
          kl: 0.013336360826512106
          policy_loss: 0.026704002958205013
          total_loss: 0.030315399997764163
          vf_explained_var: 0.23518376052379608
          vf_loss: 0.013828069996088743
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,140,4632.4,140000,-2.3877,-1.96,-3.4,238.77




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 141000
  custom_metrics: {}
  date: 2021-10-24_14-39-45
  done: false
  episode_len_mean: 239.47
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.3946999999999927
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 512
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.365574568510055e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.8462322003311581
          entropy_coeff: 0.009999999999999998
          kl: 0.004276585184977301
          policy_loss: 0.030944175438748465
          total_loss: 0.03648729572693507
          vf_explained_var: 0.20154988765716553
          vf_loss: 0.014005441084090206
    num_agent_steps_sampled: 141000
    num_agent_steps_trained: 141000
    num_steps_sampled: 141000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,141,4684.61,141000,-2.3947,-1.96,-3.4,239.47


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 142000
  custom_metrics: {}
  date: 2021-10-24_14-40-20
  done: false
  episode_len_mean: 240.22
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.4021999999999926
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 516
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1827872842550277e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.6732130249341329
          entropy_coeff: 0.009999999999999998
          kl: 0.005322478179883285
          policy_loss: 0.028300877577728697
          total_loss: 0.035081624570820064
          vf_explained_var: 0.11457231640815735
          vf_loss: 0.01351287824412187
    num_agent_steps_sampled: 142000
    num_agent_steps_trained: 142000
    num_steps_sampled: 142000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,142,4719.28,142000,-2.4022,-1.96,-3.4,240.22


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 143000
  custom_metrics: {}
  date: 2021-10-24_14-40-56
  done: false
  episode_len_mean: 240.88
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.4087999999999927
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 520
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1827872842550277e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.5591182129250633
          entropy_coeff: 0.009999999999999998
          kl: 0.0033434058757916
          policy_loss: -0.0067255212201012505
          total_loss: 0.001283343177702692
          vf_explained_var: 0.12106984108686447
          vf_loss: 0.013600045546061463
    num_agent_steps_sampled: 143000
    num_agent_steps_trained: 143000
    num_steps_sampled: 143000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,143,4755.34,143000,-2.4088,-1.96,-3.4,240.88


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2021-10-24_14-41-28
  done: false
  episode_len_mean: 241.19
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.4118999999999926
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 524
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0913936421275138e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.8338230351607004
          entropy_coeff: 0.009999999999999998
          kl: 0.005182702114390104
          policy_loss: -0.0035704692204793294
          total_loss: 0.002188854126466645
          vf_explained_var: 0.14845393598079681
          vf_loss: 0.014097551359898514
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000
    num_steps_sampled: 144000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,144,4787.38,144000,-2.4119,-1.96,-3.4,241.19


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 145000
  custom_metrics: {}
  date: 2021-10-24_14-42-02
  done: false
  episode_len_mean: 241.34
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.413399999999992
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 5
  episodes_total: 529
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0913936421275138e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.5172797958056132
          entropy_coeff: 0.009999999999999998
          kl: 0.001616553089491567
          policy_loss: -0.019092321230305565
          total_loss: -0.0066483815511067705
          vf_explained_var: 0.1119462251663208
          vf_loss: 0.01761673592651884
    num_agent_steps_sampled: 145000
    num_agent_steps_trained: 145000
    num_steps_sampled: 145000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,145,4822.02,145000,-2.4134,-1.96,-3.4,241.34


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 146000
  custom_metrics: {}
  date: 2021-10-24_14-42-38
  done: false
  episode_len_mean: 241.24
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.4123999999999923
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 533
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.456968210637569e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.7898201353020138
          entropy_coeff: 0.009999999999999998
          kl: 0.0030677765639997765
          policy_loss: 0.009584531767500772
          total_loss: 0.014577784968747034
          vf_explained_var: 0.1495211124420166
          vf_loss: 0.012891455356859498
    num_agent_steps_sampled: 146000
    num_agent_steps_trained: 146000
    num_steps_sampled: 146000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,146,4857.21,146000,-2.4124,-1.96,-3.4,241.24


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 147000
  custom_metrics: {}
  date: 2021-10-24_14-43-12
  done: false
  episode_len_mean: 241.21
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.412099999999992
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 537
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7284841053187846e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.6795257601473067
          entropy_coeff: 0.009999999999999998
          kl: 0.0030212543664962115
          policy_loss: 0.014749289386802249
          total_loss: 0.021024352974361844
          vf_explained_var: 0.13725654780864716
          vf_loss: 0.013070320203486417
    num_agent_steps_sampled: 147000
    num_agent_steps_trained: 147000
    num_steps_sampled: 147000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,147,4891.88,147000,-2.4121,-1.96,-3.4,241.21




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2021-10-24_14-44-04
  done: false
  episode_len_mean: 241.32
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.4131999999999927
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 541
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3642420526593923e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.7390493750572205
          entropy_coeff: 0.009999999999999998
          kl: 0.005982513124436492
          policy_loss: 0.012603153785069784
          total_loss: 0.01830065536002318
          vf_explained_var: 0.1769164353609085
          vf_loss: 0.013087995412449042
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_steps_sampled: 148000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,148,4943.75,148000,-2.4132,-1.96,-3.4,241.32


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 149000
  custom_metrics: {}
  date: 2021-10-24_14-44-36
  done: false
  episode_len_mean: 241.57
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.4156999999999926
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 545
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3642420526593923e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.6073363025983175
          entropy_coeff: 0.009999999999999998
          kl: 0.0018336379311176326
          policy_loss: -0.025709973441229925
          total_loss: -0.01934330008096165
          vf_explained_var: 0.17595909535884857
          vf_loss: 0.012440038824246989
    num_agent_steps_sampled: 149000
    num_agent_steps_trained: 149000
    num_steps_sampled: 149000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,149,4975.97,149000,-2.4157,-1.96,-3.4,241.57


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 150000
  custom_metrics: {}
  date: 2021-10-24_14-45-11
  done: false
  episode_len_mean: 241.95
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.419499999999992
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 5
  episodes_total: 550
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.821210263296962e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.6835529989666409
          entropy_coeff: 0.009999999999999998
          kl: 0.0030416299324991136
          policy_loss: 0.0008695341646671295
          total_loss: 0.010097421043448978
          vf_explained_var: 0.14701497554779053
          vf_loss: 0.016063414834853677
    num_agent_steps_sampled: 150000
    num_agent_steps_trained: 150000
    num_steps_sampled: 150000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,150,5010.49,150000,-2.4195,-1.96,-3.4,241.95


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 151000
  custom_metrics: {}
  date: 2021-10-24_14-45-46
  done: false
  episode_len_mean: 241.78
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.417799999999992
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 554
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.410605131648481e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.637831738922331
          entropy_coeff: 0.009999999999999998
          kl: 0.003003049307680941
          policy_loss: 0.0529499802324507
          total_loss: 0.05802251216438081
          vf_explained_var: 0.11312811821699142
          vf_loss: 0.011450850285796656
    num_agent_steps_sampled: 151000
    num_agent_steps_trained: 151000
    num_steps_sampled: 151000
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,151,5045.46,151000,-2.4178,-1.96,-3.4,241.78


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2021-10-24_14-46-21
  done: false
  episode_len_mean: 242.02
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.420199999999993
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 558
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7053025658242404e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.6336153321795993
          entropy_coeff: 0.009999999999999998
          kl: 0.003363290944544575
          policy_loss: -0.028065955059395895
          total_loss: -0.021494544711377885
          vf_explained_var: 0.12964622676372528
          vf_loss: 0.012907565664499998
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_steps_sampled: 152000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,152,5080.42,152000,-2.4202,-1.96,-3.4,242.02


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 153000
  custom_metrics: {}
  date: 2021-10-24_14-46-53
  done: false
  episode_len_mean: 242.61
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.4260999999999924
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 562
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.526512829121202e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.8368986421161227
          entropy_coeff: 0.009999999999999998
          kl: 0.00926599759395717
          policy_loss: -0.019705679515997568
          total_loss: -0.01269449761344327
          vf_explained_var: 0.048487111926078796
          vf_loss: 0.015380167412675089
    num_agent_steps_sampled: 153000
    num_agent_steps_trained: 153000
    num_steps_sampled: 153000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,153,5112.36,153000,-2.4261,-1.96,-3.4,242.61


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 154000
  custom_metrics: {}
  date: 2021-10-24_14-47-28
  done: false
  episode_len_mean: 243.29
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.432899999999992
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 566
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.526512829121202e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.5404697789086236
          entropy_coeff: 0.009999999999999998
          kl: 0.02496759911421991
          policy_loss: -0.07738174912002352
          total_loss: -0.06796436049044133
          vf_explained_var: 0.06194563955068588
          vf_loss: 0.014822086836728785
    num_agent_steps_sampled: 154000
    num_agent_steps_trained: 154000
    num_steps_sampled: 154000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,154,5147.62,154000,-2.4329,-1.96,-3.4,243.29




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 155000
  custom_metrics: {}
  date: 2021-10-24_14-48-22
  done: false
  episode_len_mean: 242.89
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.428899999999992
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 5
  episodes_total: 571
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2789769243681808e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.6234940654701657
          entropy_coeff: 0.009999999999999998
          kl: 0.0025901594492874756
          policy_loss: 0.0038064224024613696
          total_loss: 0.013462632728947533
          vf_explained_var: 0.13391032814979553
          vf_loss: 0.015891149464166827
    num_agent_steps_sampled: 155000
    num_agent_steps_trained: 155000
    num_steps_sampled: 155000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,155,5201.4,155000,-2.4289,-1.96,-3.4,242.89


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 156000
  custom_metrics: {}
  date: 2021-10-24_14-48-58
  done: false
  episode_len_mean: 242.98
  episode_media: {}
  episode_reward_max: -1.9600000000000015
  episode_reward_mean: -2.429799999999992
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 575
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.394884621840904e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.7133997814522849
          entropy_coeff: 0.009999999999999998
          kl: 0.04741345521347063
          policy_loss: 0.02013859653638469
          total_loss: 0.023851570735375086
          vf_explained_var: 0.1415913701057434
          vf_loss: 0.010846972243032521
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_steps_sampled: 156000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,156,5237.53,156000,-2.4298,-1.96,-3.4,242.98


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 157000
  custom_metrics: {}
  date: 2021-10-24_14-49-31
  done: false
  episode_len_mean: 243.43
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.434299999999992
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 5
  episodes_total: 580
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.59232693276135e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.5490999397304323
          entropy_coeff: 0.009999999999999998
          kl: 0.002371281126289304
          policy_loss: -0.029687040133608712
          total_loss: -0.018415496912267473
          vf_explained_var: 0.09942352771759033
          vf_loss: 0.01676254292122192
    num_agent_steps_sampled: 157000
    num_agent_steps_trained: 157000
    num_steps_sampled: 157000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,157,5270,157000,-2.4343,-1.98,-3.4,243.43


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 158000
  custom_metrics: {}
  date: 2021-10-24_14-50-07
  done: false
  episode_len_mean: 243.22
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.432199999999992
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 584
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.796163466380675e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.525060929523574
          entropy_coeff: 0.009999999999999998
          kl: 0.004076071978959008
          policy_loss: 0.05260199076599545
          total_loss: 0.05716368117266231
          vf_explained_var: 0.1345278024673462
          vf_loss: 0.009812300943303853
    num_agent_steps_sampled: 158000
    num_agent_steps_trained: 158000
    num_steps_sampled: 158000
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,158,5306.57,158000,-2.4322,-1.98,-3.4,243.22


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 159000
  custom_metrics: {}
  date: 2021-10-24_14-50-45
  done: false
  episode_len_mean: 243.17
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.431699999999992
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 588
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3980817331903375e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.44729905790752833
          entropy_coeff: 0.009999999999999998
          kl: 0.0034441308560055005
          policy_loss: -0.03742951816982693
          total_loss: -0.02839421522286203
          vf_explained_var: 0.0751173198223114
          vf_loss: 0.013508291573574145
    num_agent_steps_sampled: 159000
    num_agent_steps_trained: 159000
    num_steps_sampled: 159000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,159,5344.1,159000,-2.4317,-1.98,-3.4,243.17


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-10-24_14-51-21
  done: false
  episode_len_mean: 239.54
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.395399999999993
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 5
  episodes_total: 593
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1990408665951687e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.4187665906217363
          entropy_coeff: 0.009999999999999998
          kl: 0.001488856633033701
          policy_loss: 0.011997706608639823
          total_loss: 0.023626134710179436
          vf_explained_var: 0.09007091820240021
          vf_loss: 0.015816092842982875
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,160,5380.23,160000,-2.3954,-1.98,-3.26,239.54


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 161000
  custom_metrics: {}
  date: 2021-10-24_14-51-57
  done: false
  episode_len_mean: 238.67
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.3866999999999927
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 4
  episodes_total: 597
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.995204332975844e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.4790285461478763
          entropy_coeff: 0.009999999999999998
          kl: 0.0029850101564080312
          policy_loss: 0.006420391384098265
          total_loss: 0.01482117043601142
          vf_explained_var: 0.06938512623310089
          vf_loss: 0.013191063339925474
    num_agent_steps_sampled: 161000
    num_agent_steps_trained: 161000
    num_steps_sampled: 161000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,161,5415.99,161000,-2.3867,-1.98,-3.26,238.67




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 162000
  custom_metrics: {}
  date: 2021-10-24_14-52-52
  done: false
  episode_len_mean: 236.33
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.3632999999999935
  episode_reward_min: -3.2599999999999745
  episodes_this_iter: 5
  episodes_total: 602
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.997602166487922e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.39881921377446916
          entropy_coeff: 0.009999999999999998
          kl: 0.001953511273054313
          policy_loss: -0.016591212898492812
          total_loss: -0.0037389232880539365
          vf_explained_var: 0.13449157774448395
          vf_loss: 0.016840482296215164
    num_agent_steps_sampled: 162000
    num_agent_steps_trained: 162000
    num_steps_sampled: 162000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,162,5471.64,162000,-2.3633,-1.97,-3.26,236.33


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 163000
  custom_metrics: {}
  date: 2021-10-24_14-53-29
  done: false
  episode_len_mean: 234.36
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.343599999999994
  episode_reward_min: -3.0699999999999785
  episodes_this_iter: 4
  episodes_total: 606
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.498801083243961e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.39423719644546507
          entropy_coeff: 0.009999999999999998
          kl: 0.0015869850506030206
          policy_loss: -0.019353175991111332
          total_loss: -0.009530979891618092
          vf_explained_var: 0.01618017815053463
          vf_loss: 0.01376457101561957
    num_agent_steps_sampled: 163000
    num_agent_steps_trained: 163000
    num_steps_sampled: 163000
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,163,5508.56,163000,-2.3436,-1.97,-3.07,234.36


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 164000
  custom_metrics: {}
  date: 2021-10-24_14-54-04
  done: false
  episode_len_mean: 233.52
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.335199999999994
  episode_reward_min: -3.0699999999999785
  episodes_this_iter: 5
  episodes_total: 611
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.494005416219805e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.6166693886121114
          entropy_coeff: 0.009999999999999998
          kl: 0.005963750944783107
          policy_loss: -0.028580114907688563
          total_loss: -0.017269080215030246
          vf_explained_var: 0.08314958214759827
          vf_loss: 0.017477726625899474
    num_agent_steps_sampled: 164000
    num_agent_steps_trained: 164000
    num_steps_sampled: 164000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,164,5543.53,164000,-2.3352,-1.97,-3.07,233.52


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 165000
  custom_metrics: {}
  date: 2021-10-24_14-54-40
  done: false
  episode_len_mean: 232.54
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.3253999999999944
  episode_reward_min: -3.0699999999999785
  episodes_this_iter: 4
  episodes_total: 615
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.494005416219805e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.43176334897677104
          entropy_coeff: 0.009999999999999998
          kl: 0.004423788125873705
          policy_loss: 0.057907785144117145
          total_loss: 0.06334247580832905
          vf_explained_var: 0.09106312692165375
          vf_loss: 0.009752322465647011
    num_agent_steps_sampled: 165000
    num_agent_steps_trained: 165000
    num_steps_sampled: 165000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,165,5578.7,165000,-2.3254,-1.97,-3.07,232.54


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 166000
  custom_metrics: {}
  date: 2021-10-24_14-55-16
  done: false
  episode_len_mean: 231.85
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.3184999999999945
  episode_reward_min: -3.0699999999999785
  episodes_this_iter: 5
  episodes_total: 620
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7470027081099023e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.4514637400706609
          entropy_coeff: 0.009999999999999998
          kl: 0.0014119191836660624
          policy_loss: -0.009604430033100976
          total_loss: 0.0030806416438685525
          vf_explained_var: 0.12928254902362823
          vf_loss: 0.017199708128141034
    num_agent_steps_sampled: 166000
    num_agent_steps_trained: 166000
    num_steps_sampled: 166000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,166,5615.54,166000,-2.3185,-1.97,-3.07,231.85


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 167000
  custom_metrics: {}
  date: 2021-10-24_14-55-53
  done: false
  episode_len_mean: 230.54
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.3053999999999952
  episode_reward_min: -2.859999999999983
  episodes_this_iter: 4
  episodes_total: 624
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8735013540549512e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.4128494107060962
          entropy_coeff: 0.009999999999999998
          kl: 0.0016543934365010277
          policy_loss: 0.039713896397087306
          total_loss: 0.0485853161662817
          vf_explained_var: 0.06524360924959183
          vf_loss: 0.01299990965053439
    num_agent_steps_sampled: 167000
    num_agent_steps_trained: 167000
    num_steps_sampled: 167000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,167,5652.32,167000,-2.3054,-1.97,-2.86,230.54


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 168000
  custom_metrics: {}
  date: 2021-10-24_14-56-29
  done: false
  episode_len_mean: 230.61
  episode_media: {}
  episode_reward_max: -1.9700000000000015
  episode_reward_mean: -2.306099999999995
  episode_reward_min: -2.859999999999983
  episodes_this_iter: 4
  episodes_total: 628
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.367506770274756e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.5956302987204658
          entropy_coeff: 0.009999999999999998
          kl: 0.011106099948812679
          policy_loss: -0.019848362356424332
          total_loss: -0.010830404278304841
          vf_explained_var: 0.061132822185754776
          vf_loss: 0.014974260857949654
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 168000
    num_steps_sampled: 168000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,168,5687.61,168000,-2.3061,-1.97,-2.86,230.61




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 169000
  custom_metrics: {}
  date: 2021-10-24_14-57-25
  done: false
  episode_len_mean: 229.88
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.298799999999994
  episode_reward_min: -2.859999999999983
  episodes_this_iter: 5
  episodes_total: 633
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.367506770274756e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.5731973707675934
          entropy_coeff: 0.009999999999999998
          kl: 0.004886184576435287
          policy_loss: -0.02221787323554357
          total_loss: -0.009468668947617213
          vf_explained_var: 0.12080489099025726
          vf_loss: 0.018481178126401373
    num_agent_steps_sampled: 169000
    num_agent_steps_trained: 169000
    num_steps_sampled: 169000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,169,5743.67,169000,-2.2988,-1.92,-2.86,229.88


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 170000
  custom_metrics: {}
  date: 2021-10-24_14-58-01
  done: false
  episode_len_mean: 229.53
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.2952999999999943
  episode_reward_min: -2.859999999999983
  episodes_this_iter: 4
  episodes_total: 637
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.683753385137378e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.5548675772216585
          entropy_coeff: 0.009999999999999998
          kl: 0.028157844054013264
          policy_loss: 0.04039472606447008
          total_loss: 0.04614023110932774
          vf_explained_var: 0.09956896305084229
          vf_loss: 0.011294180175496472
    num_agent_steps_sampled: 170000
    num_agent_steps_trained: 170000
    num_steps_sampled: 170000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,170,5780.3,170000,-2.2953,-1.92,-2.86,229.53


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 171000
  custom_metrics: {}
  date: 2021-10-24_14-58-34
  done: false
  episode_len_mean: 230.41
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.3040999999999947
  episode_reward_min: -2.859999999999983
  episodes_this_iter: 4
  episodes_total: 641
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.025630077706068e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.8340502076678806
          entropy_coeff: 0.009999999999999998
          kl: 0.0053441980481781885
          policy_loss: 0.023260335955354903
          total_loss: 0.02918536571992768
          vf_explained_var: 0.07544578611850739
          vf_loss: 0.014265528331614203
    num_agent_steps_sampled: 171000
    num_agent_steps_trained: 171000
    num_steps_sampled: 171000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,171,5812.49,171000,-2.3041,-1.92,-2.86,230.41


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 172000
  custom_metrics: {}
  date: 2021-10-24_14-59-00
  done: false
  episode_len_mean: 233.26
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.332599999999994
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 3
  episodes_total: 644
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.025630077706068e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.0298634257581498
          entropy_coeff: 0.009999999999999998
          kl: 0.012423661266738703
          policy_loss: 0.057312012877729204
          total_loss: 0.05745740450090832
          vf_explained_var: 0.019467923790216446
          vf_loss: 0.010444023914250365
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_steps_sampled: 172000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,172,5838.47,172000,-2.3326,-1.92,-4.48,233.26


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 173000
  custom_metrics: {}
  date: 2021-10-24_14-59-23
  done: false
  episode_len_mean: 236.91
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.3690999999999933
  episode_reward_min: -4.989999999999938
  episodes_this_iter: 3
  episodes_total: 647
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.025630077706068e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.2084165652592977
          entropy_coeff: 0.009999999999999998
          kl: 0.03989609102431883
          policy_loss: 0.05705099950234095
          total_loss: 0.055312181429730524
          vf_explained_var: 0.053940825164318085
          vf_loss: 0.010345348561693552
    num_agent_steps_sampled: 173000
    num_agent_steps_trained: 173000
    num_steps_sampled: 173000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,173,5861.87,173000,-2.3691,-1.92,-4.99,236.91


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 174000
  custom_metrics: {}
  date: 2021-10-24_14-59-46
  done: false
  episode_len_mean: 240.36
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.403599999999993
  episode_reward_min: -4.989999999999938
  episodes_this_iter: 3
  episodes_total: 650
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0538445116559108e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.2931940151585473
          entropy_coeff: 0.009999999999999998
          kl: 0.008207870649876112
          policy_loss: 0.06230967856115765
          total_loss: 0.0589980750448174
          vf_explained_var: 0.12491179257631302
          vf_loss: 0.00962033637592362
    num_agent_steps_sampled: 174000
    num_agent_steps_trained: 174000
    num_steps_sampled: 174000
    num_steps_trained: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,174,5884.99,174000,-2.4036,-1.92,-4.99,240.36


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 175000
  custom_metrics: {}
  date: 2021-10-24_15-00-11
  done: false
  episode_len_mean: 243.37
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.4336999999999924
  episode_reward_min: -4.989999999999938
  episodes_this_iter: 2
  episodes_total: 652
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0538445116559108e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.6487786418861814
          entropy_coeff: 0.009999999999999998
          kl: 0.025261608314693578
          policy_loss: -0.10307237075434791
          total_loss: -0.10895534157752991
          vf_explained_var: -0.061349883675575256
          vf_loss: 0.0106048118976307
    num_agent_steps_sampled: 175000
    num_agent_steps_trained: 175000
    num_steps_sampled: 175000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,175,5910.16,175000,-2.4337,-1.92,-4.99,243.37


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 176000
  custom_metrics: {}
  date: 2021-10-24_15-00-41
  done: false
  episode_len_mean: 245.36
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.453599999999992
  episode_reward_min: -4.989999999999938
  episodes_this_iter: 4
  episodes_total: 656
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.580766767483865e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.9863228473398421
          entropy_coeff: 0.009999999999999998
          kl: 0.006422297283000174
          policy_loss: 0.038264256260461274
          total_loss: 0.040565128748615585
          vf_explained_var: 0.31334421038627625
          vf_loss: 0.012164098117500544
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 176000
    num_steps_sampled: 176000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,176,5939.51,176000,-2.4536,-1.92,-4.99,245.36


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 177000
  custom_metrics: {}
  date: 2021-10-24_15-01-05
  done: false
  episode_len_mean: 248.9
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.488999999999991
  episode_reward_min: -4.989999999999938
  episodes_this_iter: 3
  episodes_total: 659
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.580766767483865e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.156460577911801
          entropy_coeff: 0.009999999999999998
          kl: 0.009529428067979968
          policy_loss: 0.06248801483048333
          total_loss: 0.059719882822699015
          vf_explained_var: 0.45721766352653503
          vf_loss: 0.008796473743212926
    num_agent_steps_sampled: 177000
    num_agent_steps_trained: 177000
    num_steps_sampled: 177000
    num_steps_trained: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,177,5963.73,177000,-2.489,-1.92,-4.99,248.9




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 178000
  custom_metrics: {}
  date: 2021-10-24_15-01-50
  done: false
  episode_len_mean: 250.79
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.5078999999999905
  episode_reward_min: -4.989999999999938
  episodes_this_iter: 3
  episodes_total: 662
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.580766767483865e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.1004619518915812
          entropy_coeff: 0.009999999999999998
          kl: 0.00911628600601454
          policy_loss: 0.04809270285897785
          total_loss: 0.048303976986143325
          vf_explained_var: 0.1109803095459938
          vf_loss: 0.011215894038064613
    num_agent_steps_sampled: 178000
    num_agent_steps_trained: 178000
    num_steps_sampled: 178000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,178,6008.94,178000,-2.5079,-1.92,-4.99,250.79


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 179000
  custom_metrics: {}
  date: 2021-10-24_15-02-19
  done: false
  episode_len_mean: 252.62
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.52619999999999
  episode_reward_min: -4.989999999999938
  episodes_this_iter: 3
  episodes_total: 665
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.580766767483865e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.9468972272343106
          entropy_coeff: 0.009999999999999998
          kl: 0.009228495346941814
          policy_loss: -0.034942463371488784
          total_loss: -0.03342751496367984
          vf_explained_var: 0.04455925524234772
          vf_loss: 0.010983920346997264
    num_agent_steps_sampled: 179000
    num_agent_steps_trained: 179000
    num_steps_sampled: 179000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,179,6037.94,179000,-2.5262,-1.92,-4.99,252.62


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-10-24_15-02-53
  done: false
  episode_len_mean: 253.68
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.5367999999999897
  episode_reward_min: -4.989999999999938
  episodes_this_iter: 4
  episodes_total: 669
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.580766767483865e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.7998042106628418
          entropy_coeff: 0.009999999999999998
          kl: 0.005696324754767327
          policy_loss: 0.004230546289020114
          total_loss: 0.010732752250300513
          vf_explained_var: 0.1574195772409439
          vf_loss: 0.014500248639119997
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,180,6071.78,180000,-2.5368,-1.92,-4.99,253.68


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 181000
  custom_metrics: {}
  date: 2021-10-24_15-03-19
  done: false
  episode_len_mean: 256.78
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.567799999999989
  episode_reward_min: -4.989999999999938
  episodes_this_iter: 3
  episodes_total: 672
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.580766767483865e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.073092422220442
          entropy_coeff: 0.009999999999999998
          kl: 0.006481282386308212
          policy_loss: 0.015570561918947432
          total_loss: 0.01319786583383878
          vf_explained_var: 0.2794027626514435
          vf_loss: 0.008358229917171734
    num_agent_steps_sampled: 181000
    num_agent_steps_trained: 181000
    num_steps_sampled: 181000
    num_steps_trained: 18

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,181,6097.88,181000,-2.5678,-1.92,-4.99,256.78


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 182000
  custom_metrics: {}
  date: 2021-10-24_15-03-43
  done: false
  episode_len_mean: 260.23
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.602299999999988
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 3
  episodes_total: 675
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.580766767483865e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.19745411740409
          entropy_coeff: 0.009999999999999998
          kl: 0.016703600900403615
          policy_loss: 0.026434444470538034
          total_loss: 0.027273283898830415
          vf_explained_var: 0.044270604848861694
          vf_loss: 0.012813380102994012
    num_agent_steps_sampled: 182000
    num_agent_steps_trained: 182000
    num_steps_sampled: 182000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,182,6121.36,182000,-2.6023,-1.92,-5.26,260.23


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 183000
  custom_metrics: {}
  date: 2021-10-24_15-04-12
  done: false
  episode_len_mean: 262.35
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.6234999999999875
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 4
  episodes_total: 679
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.580766767483865e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.011239007446501
          entropy_coeff: 0.009999999999999998
          kl: 0.006971406984847701
          policy_loss: 0.01137792795068688
          total_loss: 0.017780493034256828
          vf_explained_var: 0.1599670946598053
          vf_loss: 0.016514954674575064
    num_agent_steps_sampled: 183000
    num_agent_steps_trained: 183000
    num_steps_sampled: 183000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,183,6151.02,183000,-2.6235,-1.92,-5.26,262.35


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2021-10-24_15-04-41
  done: false
  episode_len_mean: 264.15
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.6414999999999873
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 3
  episodes_total: 682
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.580766767483865e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.0311132722430758
          entropy_coeff: 0.009999999999999998
          kl: 0.008103045732553306
          policy_loss: 0.018074104769362345
          total_loss: 0.01819708388712671
          vf_explained_var: 0.2057335525751114
          vf_loss: 0.010434110488535629
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_steps_sampled: 184000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,184,6179.4,184000,-2.6415,-1.92,-5.26,264.15


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 185000
  custom_metrics: {}
  date: 2021-10-24_15-05-10
  done: false
  episode_len_mean: 266.03
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.660299999999987
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 3
  episodes_total: 685
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.580766767483865e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.0028484827942319
          entropy_coeff: 0.009999999999999998
          kl: 0.007069269274547531
          policy_loss: -0.10021314041482078
          total_loss: -0.09550734787351556
          vf_explained_var: 0.23436233401298523
          vf_loss: 0.014734274759474728
    num_agent_steps_sampled: 185000
    num_agent_steps_trained: 185000
    num_steps_sampled: 185000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,185,6208.39,185000,-2.6603,-1.92,-5.26,266.03


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 186000
  custom_metrics: {}
  date: 2021-10-24_15-05-41
  done: false
  episode_len_mean: 267.84
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.678399999999986
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 4
  episodes_total: 689
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.580766767483865e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.7722541318999396
          entropy_coeff: 0.009999999999999998
          kl: 0.005213172922696272
          policy_loss: 0.001293000289135509
          total_loss: 0.005000645625922415
          vf_explained_var: 0.23144526779651642
          vf_loss: 0.011430185132970413
    num_agent_steps_sampled: 186000
    num_agent_steps_trained: 186000
    num_steps_sampled: 186000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,186,6239.88,186000,-2.6784,-1.92,-5.26,267.84




Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 187000
  custom_metrics: {}
  date: 2021-10-24_15-06-28
  done: false
  episode_len_mean: 270.83
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.708299999999986
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 4
  episodes_total: 693
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.580766767483865e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.1835541798008813
          entropy_coeff: 0.009999999999999998
          kl: 0.011988522111465411
          policy_loss: -0.0008499574330117967
          total_loss: 0.0014804030458132427
          vf_explained_var: 0.30632245540618896
          vf_loss: 0.014165900813208687
    num_agent_steps_sampled: 187000
    num_agent_steps_trained: 187000
    num_steps_sampled: 187000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,187,6286.5,187000,-2.7083,-1.92,-5.26,270.83


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 188000
  custom_metrics: {}
  date: 2021-10-24_15-06-59
  done: false
  episode_len_mean: 272.15
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.7214999999999865
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 3
  episodes_total: 696
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.580766767483865e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.9122961421807607
          entropy_coeff: 0.009999999999999998
          kl: 0.006699995721525593
          policy_loss: 0.03034875334964858
          total_loss: 0.03019070534242524
          vf_explained_var: 0.2895183265209198
          vf_loss: 0.00896491601338817
    num_agent_steps_sampled: 188000
    num_agent_steps_trained: 188000
    num_steps_sampled: 188000
    num_steps_trained: 18

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,188,6317.23,188000,-2.7215,-1.92,-5.26,272.15


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 189000
  custom_metrics: {}
  date: 2021-10-24_15-07-29
  done: false
  episode_len_mean: 274.58
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.745799999999986
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 4
  episodes_total: 700
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.580766767483865e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.9062743888960945
          entropy_coeff: 0.009999999999999998
          kl: 0.006438529541372123
          policy_loss: 0.017561052905188668
          total_loss: 0.021695766184065077
          vf_explained_var: 0.21062467992305756
          vf_loss: 0.013197456217474408
    num_agent_steps_sampled: 189000
    num_agent_steps_trained: 189000
    num_steps_sampled: 189000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,189,6347.33,189000,-2.7458,-1.92,-5.26,274.58


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 190000
  custom_metrics: {}
  date: 2021-10-24_15-08-00
  done: false
  episode_len_mean: 275.85
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.758499999999985
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 3
  episodes_total: 703
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.580766767483865e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.8505037976635828
          entropy_coeff: 0.009999999999999998
          kl: 0.004321634074443952
          policy_loss: -0.09724817822376887
          total_loss: -0.09291625883844164
          vf_explained_var: 0.1196347251534462
          vf_loss: 0.012836957991951042
    num_agent_steps_sampled: 190000
    num_agent_steps_trained: 190000
    num_steps_sampled: 190000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,190,6378.91,190000,-2.7585,-1.92,-5.26,275.85


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 191000
  custom_metrics: {}
  date: 2021-10-24_15-08-30
  done: false
  episode_len_mean: 277.86
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.7785999999999844
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 4
  episodes_total: 707
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.903833837419325e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.9347180055247413
          entropy_coeff: 0.009999999999999998
          kl: 0.010795824569188678
          policy_loss: 0.013954985638459523
          total_loss: 0.018269987859659723
          vf_explained_var: 0.10139656811952591
          vf_loss: 0.013662186016639074
    num_agent_steps_sampled: 191000
    num_agent_steps_trained: 191000
    num_steps_sampled: 191000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,191,6408.64,191000,-2.7786,-1.92,-5.26,277.86


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 192000
  custom_metrics: {}
  date: 2021-10-24_15-09-00
  done: false
  episode_len_mean: 279.89
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.7988999999999833
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 4
  episodes_total: 711
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.903833837419325e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.9894795795281728
          entropy_coeff: 0.009999999999999998
          kl: 0.00810265156171014
          policy_loss: 0.016501261666417123
          total_loss: 0.022351809798015487
          vf_explained_var: 0.06857814639806747
          vf_loss: 0.01574534263668789
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_steps_sampled: 192000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,192,6438.31,192000,-2.7989,-1.92,-5.26,279.89


Result for PPO_my_env_4b4a2_00000:
  agent_timesteps_total: 193000
  custom_metrics: {}
  date: 2021-10-24_15-09-31
  done: false
  episode_len_mean: 281.32
  episode_media: {}
  episode_reward_max: -1.9200000000000015
  episode_reward_mean: -2.813199999999984
  episode_reward_min: -5.259999999999932
  episodes_this_iter: 3
  episodes_total: 714
  experiment_id: f0df27c57fd44ab2b393235f81519793
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.903833837419325e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.6880873872174157
          entropy_coeff: 0.009999999999999998
          kl: 0.01242304261419586
          policy_loss: -0.030038443124956554
          total_loss: -0.02522884060939153
          vf_explained_var: 0.051288966089487076
          vf_loss: 0.01169047321503361
    num_agent_steps_sampled: 193000
    num_agent_steps_trained: 193000
    num_steps_sampled: 193000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4b4a2_00000,RUNNING,192.168.3.5:380543,193,6469.25,193000,-2.8132,-1.92,-5.26,281.32
