In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=8, stride=4, padding=0),  
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
            nn.ReLU(), 
            nn.Conv2d(64, 512, kernel_size=2, stride=1, padding=0),
            nn.ReLU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        features_dim = 512
        self.encoder = VisualEncoder()
        self.encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AnnaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.action_head = nn.Linear(features_dim, action_space.n)
        self.value_head = nn.Linear(features_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.encoder.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs'].permute(0, 3, 1, 2).float() / 255.0
        if self.use_cuda:
            obs.cuda()
            
        features = self.encoder(obs)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, rew):
        if rew == 0:
            rew = -0.1
        return rew
    
def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C8']))
    env = PovOnlyWrapper(env)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    env = RewardWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO C8 pretrained (AnnaCNN) (3 noops after placement and reward shaping)"
                  }
              }

        },
        loggers=[WandbLogger])

2021-10-09 21:38:54,838	INFO wandb.py:170 -- Already logged into W&B.
2021-10-09 21:38:54,853	ERROR syncer.py:72 -- Log sync requires rsync to be installed.


Trial name,status,loc
PPO_my_env_4d34e_00000,RUNNING,


[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.4 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=258828)[0m 2021-10-09 21:38:58,243	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=258828)[0m 2021-10-09 21:38:58,243	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-10-09_21-40-05
  done: false
  episode_len_mean: 484.0
  episode_media: {}
  episode_reward_max: -45.20000000000037
  episode_reward_mean: -48.40000000000042
  episode_reward_min: -51.60000000000046
  episodes_this_iter: 2
  episodes_total: 2
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6374062498410542
          entropy_coeff: 0.009999999999999998
          kl: 0.011904515929222962
          policy_loss: -0.0018852401110861037
          total_loss: 0.21260175696677633
          vf_explained_var: 0.5787085890769958
          vf_loss: 0.2284801604019271
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,1,61.9216,1000,-48.4,-45.2,-51.6,484


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-10-09_21-40-20
  done: false
  episode_len_mean: 517.0
  episode_media: {}
  episode_reward_max: -45.20000000000037
  episode_reward_mean: -51.700000000000465
  episode_reward_min: -58.30000000000056
  episodes_this_iter: 1
  episodes_total: 3
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6620309193929037
          entropy_coeff: 0.009999999999999998
          kl: 0.01803415500471033
          policy_loss: 0.019109862463341818
          total_loss: 0.23480405290093687
          vf_explained_var: 0.18970385193824768
          vf_loss: 0.228707674311267
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,2,76.0614,2000,-51.7,-45.2,-58.3,517


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-10-09_21-40-33
  done: false
  episode_len_mean: 534.6
  episode_media: {}
  episode_reward_max: -45.20000000000037
  episode_reward_mean: -53.46000000000049
  episode_reward_min: -58.700000000000564
  episodes_this_iter: 2
  episodes_total: 5
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.5666171855396696
          entropy_coeff: 0.009999999999999998
          kl: 0.013362808227218852
          policy_loss: 0.019572347692317433
          total_loss: 0.2517342766539918
          vf_explained_var: 0.4106024503707886
          vf_loss: 0.24515554122626781
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 3000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,3,89.8448,3000,-53.46,-45.2,-58.7,534.6


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-10-09_21-40-48
  done: false
  episode_len_mean: 538.4285714285714
  episode_media: {}
  episode_reward_max: -45.20000000000037
  episode_reward_mean: -53.842857142857646
  episode_reward_min: -58.700000000000564
  episodes_this_iter: 2
  episodes_total: 7
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6130048897531297
          entropy_coeff: 0.009999999999999998
          kl: 0.011417587975743496
          policy_loss: -0.0015001221663422054
          total_loss: 0.2973063816626867
          vf_explained_var: 0.23403385281562805
          vf_loss: 0.31265303124156263
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,4,104.785,4000,-53.8429,-45.2,-58.7,538.429


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-10-09_21-41-03
  done: false
  episode_len_mean: 540.2222222222222
  episode_media: {}
  episode_reward_max: -45.20000000000037
  episode_reward_mean: -54.02222222222272
  episode_reward_min: -58.700000000000564
  episodes_this_iter: 2
  episodes_total: 9
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7277319894896612
          entropy_coeff: 0.009999999999999998
          kl: 0.011258051009762484
          policy_loss: 0.12729045020209417
          total_loss: 0.3761722773313522
          vf_explained_var: 0.28124526143074036
          vf_loss: 0.2639075417899423
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 5000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,5,118.981,5000,-54.0222,-45.2,-58.7,540.222


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-10-09_21-41-20
  done: false
  episode_len_mean: 527.4545454545455
  episode_media: {}
  episode_reward_max: -45.20000000000037
  episode_reward_mean: -52.745454545455026
  episode_reward_min: -58.700000000000564
  episodes_this_iter: 2
  episodes_total: 11
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.769703167014652
          entropy_coeff: 0.009999999999999998
          kl: 0.011410914931694964
          policy_loss: -0.010947428312566546
          total_loss: 0.24913705297642283
          vf_explained_var: 0.33329471945762634
          vf_loss: 0.27549933230297435
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,6,136.409,6000,-52.7455,-45.2,-58.7,527.455


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-10-09_21-41-35
  done: false
  episode_len_mean: 522.0
  episode_media: {}
  episode_reward_max: -45.20000000000037
  episode_reward_mean: -52.200000000000465
  episode_reward_min: -58.700000000000564
  episodes_this_iter: 2
  episodes_total: 13
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6403181711832682
          entropy_coeff: 0.009999999999999998
          kl: 0.007259975367006098
          policy_loss: 0.06299657391177284
          total_loss: 0.3950923330254025
          vf_explained_var: 0.41784006357192993
          vf_loss: 0.34704694603052405
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained: 7000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,7,151.678,7000,-52.2,-45.2,-58.7,522


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-10-09_21-41-48
  done: false
  episode_len_mean: 527.5333333333333
  episode_media: {}
  episode_reward_max: -45.20000000000037
  episode_reward_mean: -52.753333333333806
  episode_reward_min: -58.700000000000564
  episodes_this_iter: 2
  episodes_total: 15
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.704326374000973
          entropy_coeff: 0.009999999999999998
          kl: 0.011227955352923816
          policy_loss: 0.09704650574260287
          total_loss: 0.43954161124096974
          vf_explained_var: 0.3781976103782654
          vf_loss: 0.35729277847955626
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,8,164.651,8000,-52.7533,-45.2,-58.7,527.533


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-10-09_21-42-02
  done: false
  episode_len_mean: 532.125
  episode_media: {}
  episode_reward_max: -45.20000000000037
  episode_reward_mean: -53.21250000000049
  episode_reward_min: -60.100000000000584
  episodes_this_iter: 1
  episodes_total: 16
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7570066266589695
          entropy_coeff: 0.009999999999999998
          kl: 0.0075257509468616805
          policy_loss: -0.06456692959699366
          total_loss: 0.30471791649858154
          vf_explained_var: 0.3997000455856323
          vf_loss: 0.3853497669307722
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 9000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,9,178.209,9000,-53.2125,-45.2,-60.1,532.125


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-10-09_21-42-14
  done: false
  episode_len_mean: 536.6111111111111
  episode_media: {}
  episode_reward_max: -45.20000000000037
  episode_reward_mean: -53.66111111111161
  episode_reward_min: -62.300000000000615
  episodes_this_iter: 2
  episodes_total: 18
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6769349733988443
          entropy_coeff: 0.009999999999999998
          kl: 0.009709388017812633
          policy_loss: -0.09002938369909923
          total_loss: 0.7142201367351744
          vf_explained_var: 0.3765750825405121
          vf_loss: 0.8190769890116321
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,10,190.817,10000,-53.6611,-45.2,-62.3,536.611


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-10-09_21-42-27
  done: false
  episode_len_mean: 543.4
  episode_media: {}
  episode_reward_max: -45.20000000000037
  episode_reward_mean: -54.34000000000051
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 2
  episodes_total: 20
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.4617929763264126
          entropy_coeff: 0.009999999999999998
          kl: 0.003940579199421753
          policy_loss: 0.09973888910479016
          total_loss: 0.5756603595283296
          vf_explained_var: 0.32582178711891174
          vf_loss: 0.4897512865977155
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_trained: 11000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,11,203.43,11000,-54.34,-45.2,-64,543.4


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-10-09_21-42-39
  done: false
  episode_len_mean: 545.3181818181819
  episode_media: {}
  episode_reward_max: -45.20000000000037
  episode_reward_mean: -54.53181818181869
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 2
  episodes_total: 22
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6589543210135567
          entropy_coeff: 0.009999999999999998
          kl: 0.025686176814379753
          policy_loss: 0.09353899508714676
          total_loss: 0.5687202600969209
          vf_explained_var: 0.37338143587112427
          vf_loss: 0.4892021916496257
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,12,214.87,12000,-54.5318,-45.2,-64,545.318


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-10-09_21-42-52
  done: false
  episode_len_mean: 547.9130434782609
  episode_media: {}
  episode_reward_max: -45.20000000000037
  episode_reward_mean: -54.7913043478266
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 1
  episodes_total: 23
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 1.9412229710155062
          entropy_coeff: 0.009999999999999998
          kl: 0.07891329814396315
          policy_loss: 0.11812745134035746
          total_loss: 0.2619482696470287
          vf_explained_var: 0.5541654229164124
          vf_loss: 0.1513960467858447
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 13000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,13,228.336,13000,-54.7913,-45.2,-64,547.913


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-10-09_21-43-08
  done: false
  episode_len_mean: 542.96
  episode_media: {}
  episode_reward_max: -45.20000000000037
  episode_reward_mean: -54.296000000000504
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 2
  episodes_total: 25
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2250000000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.8534856849246555
          entropy_coeff: 0.009999999999999998
          kl: 0.020184519999214008
          policy_loss: -0.16709880199697283
          total_loss: 0.4188656525479423
          vf_explained_var: 0.615522027015686
          vf_loss: 0.5999577907638417
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained: 14000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,14,244.51,14000,-54.296,-45.2,-64,542.96


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-10-09_21-43-26
  done: false
  episode_len_mean: 530.8214285714286
  episode_media: {}
  episode_reward_max: -36.300000000000246
  episode_reward_mean: -53.082142857143346
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 3
  episodes_total: 28
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8966564959949916
          entropy_coeff: 0.009999999999999998
          kl: 0.012317307921302164
          policy_loss: 0.010679596289992332
          total_loss: 0.5422705931795968
          vf_explained_var: 0.7506063580513
          vf_loss: 0.5464004711972342
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 150

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,15,262.39,15000,-53.0821,-36.3,-64,530.821




Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-10-09_21-44-01
  done: false
  episode_len_mean: 524.3333333333334
  episode_media: {}
  episode_reward_max: -36.300000000000246
  episode_reward_mean: -52.43333333333381
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 2
  episodes_total: 30
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9640616801049975
          entropy_coeff: 0.009999999999999998
          kl: 0.0087817882562713
          policy_loss: -0.04746115571922726
          total_loss: 0.4191859797471099
          vf_explained_var: 0.787265419960022
          vf_loss: 0.4833238970074389
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_trained: 1600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,16,297.652,16000,-52.4333,-36.3,-64,524.333


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-10-09_21-44-21
  done: false
  episode_len_mean: 522.78125
  episode_media: {}
  episode_reward_max: -36.300000000000246
  episode_reward_mean: -52.478125000000475
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 2
  episodes_total: 32
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.160564568307665
          entropy_coeff: 0.009999999999999998
          kl: 0.01928802825954262
          policy_loss: -0.09622615033553707
          total_loss: 0.9063408523797989
          vf_explained_var: 0.6060976982116699
          vf_loss: 1.017662951350212
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_trained: 17000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,17,317.009,17000,-52.4781,-36.3,-64,522.781


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-10-09_21-44-38
  done: false
  episode_len_mean: 522.5882352941177
  episode_media: {}
  episode_reward_max: -36.300000000000246
  episode_reward_mean: -52.658823529412246
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 2
  episodes_total: 34
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1023019499248927
          entropy_coeff: 0.009999999999999998
          kl: 0.020649540136417916
          policy_loss: -0.0032476163572735255
          total_loss: 0.9231698777940538
          vf_explained_var: 0.4732872545719147
          vf_loss: 0.9404713047875298
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,18,333.804,18000,-52.6588,-36.3,-64,522.588


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-10-09_21-44-59
  done: false
  episode_len_mean: 517.9166666666666
  episode_media: {}
  episode_reward_max: -36.300000000000246
  episode_reward_mean: -52.19166666666714
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 2
  episodes_total: 36
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.102501567204793
          entropy_coeff: 0.009999999999999998
          kl: 0.013442778299188794
          policy_loss: -0.131264272166623
          total_loss: 0.6592761879165967
          vf_explained_var: 0.674710750579834
          vf_loss: 0.8047600693172878
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_trained: 19000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,19,355.476,19000,-52.1917,-36.3,-64,517.917


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-10-09_21-45-18
  done: false
  episode_len_mean: 510.7435897435897
  episode_media: {}
  episode_reward_max: -36.300000000000246
  episode_reward_mean: -51.46153846153892
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 3
  episodes_total: 39
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9263595408863492
          entropy_coeff: 0.009999999999999998
          kl: 0.004200479659693259
          policy_loss: 0.0093581047323015
          total_loss: 0.5124841400318676
          vf_explained_var: 0.8380680680274963
          vf_loss: 0.5202631426768171
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,20,374.646,20000,-51.4615,-36.3,-64,510.744


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-10-09_21-45-36
  done: false
  episode_len_mean: 510.5853658536585
  episode_media: {}
  episode_reward_max: -36.300000000000246
  episode_reward_mean: -51.50243902439071
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 2
  episodes_total: 41
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 2.2353843424055313
          entropy_coeff: 0.009999999999999998
          kl: 0.01688800767602048
          policy_loss: 0.032173023621241255
          total_loss: 0.549167098932796
          vf_explained_var: 0.5079111456871033
          vf_loss: 0.5350731456445323
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_trained: 21000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,21,392.233,21000,-51.5024,-36.3,-64,510.585


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-10-09_21-45-55
  done: false
  episode_len_mean: 508.93023255813955
  episode_media: {}
  episode_reward_max: -36.300000000000246
  episode_reward_mean: -51.28372093023302
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 2
  episodes_total: 43
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 2.106798301802741
          entropy_coeff: 0.009999999999999998
          kl: 0.010835478036245311
          policy_loss: -0.008011911229954826
          total_loss: 0.548278480850988
          vf_explained_var: 0.6041936874389648
          vf_loss: 0.5746156500859393
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained: 22000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,22,410.909,22000,-51.2837,-36.3,-64,508.93


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-10-09_21-46-11
  done: false
  episode_len_mean: 508.8888888888889
  episode_media: {}
  episode_reward_max: -36.300000000000246
  episode_reward_mean: -51.47333333333379
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 2
  episodes_total: 45
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 2.228655155499776
          entropy_coeff: 0.009999999999999998
          kl: 0.014543435001720518
          policy_loss: 0.07323275605837504
          total_loss: 0.920251762535837
          vf_explained_var: 0.4346863925457001
          vf_loss: 0.8656242529551188
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_trained: 23000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,23,427.482,23000,-51.4733,-36.3,-64,508.889


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-10-09_21-46-30
  done: false
  episode_len_mean: 505.72340425531917
  episode_media: {}
  episode_reward_max: -36.300000000000246
  episode_reward_mean: -51.13191489361747
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 2
  episodes_total: 47
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 2.10178224907981
          entropy_coeff: 0.009999999999999998
          kl: 0.008844130696868745
          policy_loss: -0.013350406040747961
          total_loss: 0.6259875532653597
          vf_explained_var: 0.7506561279296875
          vf_loss: 0.6581171169877053
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_trained: 24000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,24,446.523,24000,-51.1319,-36.3,-64,505.723


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-10-09_21-46-49
  done: false
  episode_len_mean: 503.7959183673469
  episode_media: {}
  episode_reward_max: -36.300000000000246
  episode_reward_mean: -50.90000000000045
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 2
  episodes_total: 49
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.253125
          cur_lr: 5.000000000000001e-05
          entropy: 2.054393830564287
          entropy_coeff: 0.009999999999999998
          kl: 0.020211616021907453
          policy_loss: -0.0449271146621969
          total_loss: 0.6154619554678599
          vf_explained_var: 0.783899188041687
          vf_loss: 0.6758169332726134
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_trained: 25000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,25,464.675,25000,-50.9,-36.3,-64,503.796


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-10-09_21-47-06
  done: false
  episode_len_mean: 502.84313725490193
  episode_media: {}
  episode_reward_max: -36.300000000000246
  episode_reward_mean: -50.79411764705927
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 2
  episodes_total: 51
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.0699371735254926
          entropy_coeff: 0.009999999999999998
          kl: 0.014924769289720126
          policy_loss: -0.0762119311425421
          total_loss: 0.6686217622624503
          vf_explained_var: 0.6756674647331238
          vf_loss: 0.7598663098282284
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,26,482.3,26000,-50.7941,-36.3,-64,502.843


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-10-09_21-47-28
  done: false
  episode_len_mean: 497.462962962963
  episode_media: {}
  episode_reward_max: -35.70000000000024
  episode_reward_mean: -50.187037037037484
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 3
  episodes_total: 54
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9458345068825615
          entropy_coeff: 0.009999999999999998
          kl: 0.00931491688109034
          policy_loss: 0.1026957134405772
          total_loss: 0.5781308208902677
          vf_explained_var: 0.5016123652458191
          vf_loss: 0.4913566998309559
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained: 27000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,27,503.953,27000,-50.187,-35.7,-64,497.463


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-10-09_21-47-51
  done: false
  episode_len_mean: 490.7719298245614
  episode_media: {}
  episode_reward_max: -35.00000000000023
  episode_reward_mean: -49.49473684210569
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 3
  episodes_total: 57
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9411522560649448
          entropy_coeff: 0.009999999999999998
          kl: 0.011518302966845716
          policy_loss: 0.005967400802506341
          total_loss: 0.6680805901686351
          vf_explained_var: 0.4502255320549011
          vf_loss: 0.6771513478623497
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,28,526.707,28000,-49.4947,-35,-64,490.772




Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-10-09_21-48-31
  done: false
  episode_len_mean: 483.23333333333335
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -48.72000000000042
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 3
  episodes_total: 60
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9043640004263984
          entropy_coeff: 0.009999999999999998
          kl: 0.010205370808529936
          policy_loss: -0.009177690082126193
          total_loss: 0.7675500459141201
          vf_explained_var: 0.5658093094825745
          vf_loss: 0.7918965222934882
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,29,567.103,29000,-48.72,-27.7,-64,483.233


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-10-09_21-48-55
  done: false
  episode_len_mean: 479.03225806451616
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -48.28709677419396
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 2
  episodes_total: 62
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9573316706551447
          entropy_coeff: 0.009999999999999998
          kl: 0.00982629216524155
          policy_loss: -0.11412738396061792
          total_loss: 0.653442473212878
          vf_explained_var: 0.5345725417137146
          vf_loss: 0.7834122449159622
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 30

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,30,591.282,30000,-48.2871,-27.7,-64,479.032


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-10-09_21-49-17
  done: false
  episode_len_mean: 474.3538461538462
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -47.80153846153887
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 3
  episodes_total: 65
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9874671207533943
          entropy_coeff: 0.009999999999999998
          kl: 0.00811639943051811
          policy_loss: 0.06259272752536668
          total_loss: 0.8514562868409686
          vf_explained_var: 0.4475015103816986
          vf_loss: 0.8056565299216244
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trained: 310

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,31,612.463,31000,-47.8015,-27.7,-64,474.354


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-10-09_21-49-38
  done: false
  episode_len_mean: 469.3970588235294
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -47.28970588235333
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 3
  episodes_total: 68
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9932178139686585
          entropy_coeff: 0.009999999999999998
          kl: 0.011011475211836668
          policy_loss: 0.021861753861109414
          total_loss: 0.9240744481484096
          vf_explained_var: -0.0689140036702156
          vf_loss: 0.917963960270087
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,32,633.958,32000,-47.2897,-27.7,-64,469.397


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-10-09_21-50-00
  done: false
  episode_len_mean: 464.76056338028167
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -46.811267605634185
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 3
  episodes_total: 71
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9691729876730177
          entropy_coeff: 0.009999999999999998
          kl: 0.01191372856258314
          policy_loss: 0.013771491911676195
          total_loss: 0.7221578114562565
          vf_explained_var: 0.5251137614250183
          vf_loss: 0.7235545557406213
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,33,656.389,33000,-46.8113,-27.7,-64,464.761


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-10-09_21-50-22
  done: false
  episode_len_mean: 462.26027397260276
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -46.552054794520934
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 2
  episodes_total: 73
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9500460863113402
          entropy_coeff: 0.009999999999999998
          kl: 0.012578289635201086
          policy_loss: -0.06122422019640605
          total_loss: 0.8698786520295673
          vf_explained_var: 0.4047825336456299
          vf_loss: 0.9458275084694227
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,34,677.835,34000,-46.5521,-27.7,-64,462.26


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-10-09_21-50-48
  done: false
  episode_len_mean: 457.13157894736844
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -46.02631578947407
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 3
  episodes_total: 76
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.7983195490307278
          entropy_coeff: 0.009999999999999998
          kl: 0.009092985089838615
          policy_loss: -0.07037640313307444
          total_loss: 1.0627915302912394
          vf_explained_var: 0.5226301550865173
          vf_loss: 1.1476986255910662
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,35,703.707,35000,-46.0263,-27.7,-64,457.132


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-10-09_21-51-12
  done: false
  episode_len_mean: 451.9240506329114
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -45.49367088607632
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 3
  episodes_total: 79
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.6760740253660413
          entropy_coeff: 0.009999999999999998
          kl: 0.0063294320572967735
          policy_loss: -0.09740730979376369
          total_loss: 0.5504085911644829
          vf_explained_var: 0.8195704817771912
          vf_loss: 0.6621734400590261
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,36,727.635,36000,-45.4937,-27.7,-64,451.924


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-10-09_21-51-34
  done: false
  episode_len_mean: 448.8658536585366
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -45.17682926829305
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 3
  episodes_total: 82
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.8350398725933499
          entropy_coeff: 0.009999999999999998
          kl: 0.01097473731536197
          policy_loss: -0.04011894538998604
          total_loss: 0.39322917229599424
          vf_explained_var: 0.7785560488700867
          vf_loss: 0.44753154582447474
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,37,749.944,37000,-45.1768,-27.7,-64,448.866


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-10-09_21-52-00
  done: false
  episode_len_mean: 444.50588235294117
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -44.73058823529448
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 3
  episodes_total: 85
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.8151248627238803
          entropy_coeff: 0.009999999999999998
          kl: 0.010426505586619329
          policy_loss: 0.013093307945463392
          total_loss: 0.4724164120025105
          vf_explained_var: 0.8405457139015198
          vf_loss: 0.4735155345665084
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,38,775.435,38000,-44.7306,-27.7,-64,444.506


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-10-09_21-52-24
  done: false
  episode_len_mean: 440.34090909090907
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -44.30454545454581
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 3
  episodes_total: 88
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.8008758889304266
          entropy_coeff: 0.009999999999999998
          kl: 0.008408555288075013
          policy_loss: -0.04895925753646427
          total_loss: 0.8659470114443037
          vf_explained_var: 0.578240156173706
          vf_loss: 0.9297224031554328
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,39,799.745,39000,-44.3045,-27.7,-64,440.341




Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-10-09_21-53-05
  done: false
  episode_len_mean: 436.5054945054945
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -43.91208791208826
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 3
  episodes_total: 91
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.737001609802246
          entropy_coeff: 0.009999999999999998
          kl: 0.007267331520050351
          policy_loss: -0.07009933408763673
          total_loss: 0.6155373142825232
          vf_explained_var: 0.6677569150924683
          vf_loss: 0.7002473437123828
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,40,840.999,40000,-43.9121,-27.7,-64,436.505


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-10-09_21-53-32
  done: false
  episode_len_mean: 431.2315789473684
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -43.37368421052666
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 4
  episodes_total: 95
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.5262902683681911
          entropy_coeff: 0.009999999999999998
          kl: 0.0052532847997676865
          policy_loss: -0.03928265016939905
          total_loss: 0.4347560207049052
          vf_explained_var: 0.8780453205108643
          vf_loss: 0.4873069680399365
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,41,867.503,41000,-43.3737,-27.7,-64,431.232


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-10-09_21-53-56
  done: false
  episode_len_mean: 428.2448979591837
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -43.06734693877585
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 3
  episodes_total: 98
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.7853638423813714
          entropy_coeff: 0.009999999999999998
          kl: 0.01617741837293922
          policy_loss: 0.01998936790559027
          total_loss: 0.25848158357871903
          vf_explained_var: 0.9446802735328674
          vf_loss: 0.2502034905884001
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,42,891.606,42000,-43.0673,-27.7,-64,428.245


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-10-09_21-54-22
  done: false
  episode_len_mean: 423.63
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -42.60100000000033
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 3
  episodes_total: 101
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.6356724964247809
          entropy_coeff: 0.009999999999999998
          kl: 0.008558793435173071
          policy_loss: 0.0613114879363113
          total_loss: 0.4004164681666427
          vf_explained_var: 0.876855194568634
          vf_loss: 0.35221203598711226
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,43,917.296,43000,-42.601,-27.7,-64,423.63


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-10-09_21-54-45
  done: false
  episode_len_mean: 419.02
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -42.14000000000032
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 3
  episodes_total: 104
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3796875000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.8803703917397394
          entropy_coeff: 0.009999999999999998
          kl: 0.030427328131540025
          policy_loss: -0.034681281157665785
          total_loss: 0.1063942385216554
          vf_explained_var: 0.976069986820221
          vf_loss: 0.14832634689907234
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,44,941.13,44000,-42.14,-27.7,-64,419.02


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-10-09_21-55-08
  done: false
  episode_len_mean: 415.13
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -41.751000000000325
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 2
  episodes_total: 106
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8246644934018452
          entropy_coeff: 0.009999999999999998
          kl: 0.01563561029974877
          policy_loss: 0.00910200575987498
          total_loss: 0.2106970423211654
          vf_explained_var: 0.9415456056594849
          vf_loss: 0.21093671491576566
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,45,964.062,45000,-41.751,-27.7,-64,415.13


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-10-09_21-55-29
  done: false
  episode_len_mean: 411.05
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -41.34300000000031
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 3
  episodes_total: 109
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.039301155673133
          entropy_coeff: 0.009999999999999998
          kl: 0.010096572644339118
          policy_loss: -0.08162537415822348
          total_loss: 0.25781638564334974
          vf_explained_var: 0.7590982913970947
          vf_loss: 0.3540844584504763
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,46,984.631,46000,-41.343,-27.7,-64,411.05


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-10-09_21-55-49
  done: false
  episode_len_mean: 410.17
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -41.255000000000315
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 2
  episodes_total: 111
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9652636978361342
          entropy_coeff: 0.009999999999999998
          kl: 0.008119518024797361
          policy_loss: 0.11208021168907484
          total_loss: 0.30656674719519084
          vf_explained_var: 0.9170020818710327
          vf_loss: 0.2095148532754845
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,47,1005.09,47000,-41.255,-27.7,-64,410.17


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-10-09_21-56-08
  done: false
  episode_len_mean: 409.7
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -41.20800000000032
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 2
  episodes_total: 113
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1435962292883133
          entropy_coeff: 0.009999999999999998
          kl: 0.008334116767951254
          policy_loss: 0.07305906431542503
          total_loss: 0.5442634800242053
          vf_explained_var: 0.26417356729507446
          vf_loss: 0.48789383934603797
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,48,1023.63,48000,-41.208,-27.7,-64,409.7


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-10-09_21-56-26
  done: false
  episode_len_mean: 407.75
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -41.01300000000031
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 2
  episodes_total: 115
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1908050960964625
          entropy_coeff: 0.009999999999999998
          kl: 0.01012051038408208
          policy_loss: -0.05652474938995308
          total_loss: 0.4658586450748973
          vf_explained_var: 0.777675986289978
          vf_loss: 0.5385275040235784
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,49,1041.2,49000,-41.013,-27.7,-64,407.75


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-10-09_21-56-43
  done: false
  episode_len_mean: 406.45
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -40.88300000000031
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 2
  episodes_total: 117
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1861447042889064
          entropy_coeff: 0.009999999999999998
          kl: 0.00847109909474963
          policy_loss: -0.06938267631663217
          total_loss: 0.5775346640083525
          vf_explained_var: 0.6911901831626892
          vf_loss: 0.663954238138265
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,50,1058.19,50000,-40.883,-27.7,-64,406.45


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-10-09_21-56-59
  done: false
  episode_len_mean: 404.14
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -40.652000000000314
  episode_reward_min: -64.00000000000064
  episodes_this_iter: 2
  episodes_total: 119
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1581727849112617
          entropy_coeff: 0.009999999999999998
          kl: 0.00578170648519117
          policy_loss: -0.12790673275788625
          total_loss: 0.5679368125067816
          vf_explained_var: 0.09014052152633667
          vf_loss: 0.7141324101222886
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,51,1074.86,51000,-40.652,-27.7,-64,404.14




Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-10-09_21-57-34
  done: false
  episode_len_mean: 401.02
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -40.3400000000003
  episode_reward_min: -62.00000000000055
  episodes_this_iter: 2
  episodes_total: 121
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.147787425253126
          entropy_coeff: 0.009999999999999998
          kl: 0.007125164759164903
          policy_loss: -0.06005838844511244
          total_loss: 0.7154328465461731
          vf_explained_var: 0.3778996765613556
          vf_loss: 0.7929111086659961
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,52,1109.45,52000,-40.34,-27.7,-62,401.02


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-10-09_21-57-50
  done: false
  episode_len_mean: 398.79
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -40.0940000000003
  episode_reward_min: -62.00000000000055
  episodes_this_iter: 3
  episodes_total: 124
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.131542082627614
          entropy_coeff: 0.009999999999999998
          kl: 0.012687394822211021
          policy_loss: -0.025143651829825506
          total_loss: 0.8133208925525347
          vf_explained_var: 0.5456615686416626
          vf_loss: 0.8525541102720632
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,53,1125.99,53000,-40.094,-27.7,-62,398.79


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-10-09_21-58-06
  done: false
  episode_len_mean: 399.03
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -40.1180000000003
  episode_reward_min: -62.00000000000055
  episodes_this_iter: 1
  episodes_total: 125
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0822729931937323
          entropy_coeff: 0.009999999999999998
          kl: 0.008244365119240468
          policy_loss: -0.14581242174737982
          total_loss: 0.3512969901578294
          vf_explained_var: 0.6676865220069885
          vf_loss: 0.5132367073661751
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,54,1141.46,54000,-40.118,-27.7,-62,399.03


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-10-09_21-58-23
  done: false
  episode_len_mean: 400.88
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -40.30300000000031
  episode_reward_min: -62.00000000000055
  episodes_this_iter: 3
  episodes_total: 128
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.075615378220876
          entropy_coeff: 0.009999999999999998
          kl: 0.006908033913039022
          policy_loss: 0.03954378763834635
          total_loss: 0.621994104112188
          vf_explained_var: 0.6130428910255432
          vf_loss: 0.5992721346517403
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,55,1158.19,55000,-40.303,-27.7,-62,400.88


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-10-09_21-58-38
  done: false
  episode_len_mean: 402.43
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -40.458000000000304
  episode_reward_min: -62.00000000000055
  episodes_this_iter: 2
  episodes_total: 130
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8092089388105603
          entropy_coeff: 0.009999999999999998
          kl: 0.009829429845432043
          policy_loss: 0.11363618593249056
          total_loss: 0.25625798896782925
          vf_explained_var: 0.8377797603607178
          vf_loss: 0.15511572683850924
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,56,1173.64,56000,-40.458,-27.7,-62,402.43


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-10-09_21-58-56
  done: false
  episode_len_mean: 401.83
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -40.334000000000295
  episode_reward_min: -62.00000000000055
  episodes_this_iter: 2
  episodes_total: 132
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.123471956782871
          entropy_coeff: 0.009999999999999998
          kl: 0.005531791952262862
          policy_loss: 0.08648552331659529
          total_loss: 0.7289005551073287
          vf_explained_var: -0.34066012501716614
          vf_loss: 0.6604992133047846
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,57,1190.96,57000,-40.334,-27.7,-62,401.83


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-10-09_21-59-12
  done: false
  episode_len_mean: 400.9
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -40.1690000000003
  episode_reward_min: -62.00000000000055
  episodes_this_iter: 2
  episodes_total: 134
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1491546829541526
          entropy_coeff: 0.009999999999999998
          kl: 0.004358949599387035
          policy_loss: 0.09965233537885877
          total_loss: 0.6599284920427534
          vf_explained_var: -0.21627317368984222
          vf_loss: 0.5792851437607573
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,58,1207.74,58000,-40.169,-27.7,-62,400.9


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-10-09_21-59-29
  done: false
  episode_len_mean: 401.36
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -40.2070000000003
  episode_reward_min: -62.00000000000055
  episodes_this_iter: 2
  episodes_total: 136
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.15542604525884
          entropy_coeff: 0.009999999999999998
          kl: 0.010967853677526607
          policy_loss: 0.019697845230499902
          total_loss: 0.653761335545116
          vf_explained_var: -0.1710469126701355
          vf_loss: 0.6524944908089108
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,59,1224.82,59000,-40.207,-27.7,-62,401.36


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-10-09_21-59-48
  done: false
  episode_len_mean: 401.85
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -40.256000000000306
  episode_reward_min: -62.00000000000055
  episodes_this_iter: 2
  episodes_total: 138
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.034890209303962
          entropy_coeff: 0.009999999999999998
          kl: 0.008560083596067712
          policy_loss: -0.08711452649699317
          total_loss: 1.2408103578620486
          vf_explained_var: -0.03783407062292099
          vf_loss: 1.3458361848360962
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,60,1242.93,60000,-40.256,-27.7,-62,401.85


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-10-09_22-00-06
  done: false
  episode_len_mean: 400.65
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -40.0980000000003
  episode_reward_min: -62.00000000000055
  episodes_this_iter: 3
  episodes_total: 141
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9878830605083042
          entropy_coeff: 0.009999999999999998
          kl: 0.008583159674769513
          policy_loss: 0.015368676516744825
          total_loss: 1.2919688549306658
          vf_explained_var: 0.13307806849479675
          vf_loss: 1.294034821457333
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,61,1260.93,61000,-40.098,-27.7,-62,400.65


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-10-09_22-00-25
  done: false
  episode_len_mean: 399.34
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -39.98100000000029
  episode_reward_min: -62.00000000000055
  episodes_this_iter: 2
  episodes_total: 143
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.037040620379978
          entropy_coeff: 0.009999999999999998
          kl: 0.009103510756029342
          policy_loss: 0.07812299985024664
          total_loss: 0.5956764355301857
          vf_explained_var: 0.5151048302650452
          vf_loss: 0.5353314687601394
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,62,1280.18,62000,-39.981,-27.7,-62,399.34


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-10-09_22-00-43
  done: false
  episode_len_mean: 397.97
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -39.749000000000294
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 2
  episodes_total: 145
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0628636439641315
          entropy_coeff: 0.009999999999999998
          kl: 0.010821728522433608
          policy_loss: -0.0858194856180085
          total_loss: 1.2004126409689586
          vf_explained_var: 0.288672536611557
          vf_loss: 1.3037790900303259
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,63,1297.91,63000,-39.749,-27.7,-54.1,397.97


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-10-09_22-01-01
  done: false
  episode_len_mean: 398.06
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -39.758000000000294
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 2
  episodes_total: 147
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.052139268981086
          entropy_coeff: 0.009999999999999998
          kl: 0.014457054935789977
          policy_loss: -0.12988973491721684
          total_loss: 1.0641609986623128
          vf_explained_var: 0.14465342462062836
          vf_loss: 1.210455266634623
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,64,1315.99,64000,-39.758,-27.7,-54.1,398.06




Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-10-09_22-01-37
  done: false
  episode_len_mean: 396.17
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -39.5720000000003
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 3
  episodes_total: 150
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0689600083563064
          entropy_coeff: 0.009999999999999998
          kl: 0.009594358430689611
          policy_loss: 0.023103589316209157
          total_loss: 1.057578941517406
          vf_explained_var: 0.4577123820781708
          vf_loss: 1.0524328203664886
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 65000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,65,1351.86,65000,-39.572,-27.7,-54.1,396.17


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-10-09_22-01-56
  done: false
  episode_len_mean: 395.87
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -39.5640000000003
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 2
  episodes_total: 152
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9513062371148004
          entropy_coeff: 0.009999999999999998
          kl: 0.010777636335551193
          policy_loss: -0.06571590469943153
          total_loss: 0.8590329286538892
          vf_explained_var: 0.585188090801239
          vf_loss: 0.9411927934322092
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,66,1371.27,66000,-39.564,-27.7,-54.1,395.87


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-10-09_22-02-14
  done: false
  episode_len_mean: 397.24
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -39.70100000000029
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 3
  episodes_total: 155
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9186053660180833
          entropy_coeff: 0.009999999999999998
          kl: 0.007645272145573046
          policy_loss: -0.0020075139072206283
          total_loss: 1.0141999145348868
          vf_explained_var: 0.0696999654173851
          vf_loss: 1.0332163698143428
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 67000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,67,1389.43,67000,-39.701,-27.7,-54.1,397.24


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-10-09_22-02-32
  done: false
  episode_len_mean: 399.05
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -39.8820000000003
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 2
  episodes_total: 157
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1004348516464235
          entropy_coeff: 0.009999999999999998
          kl: 0.010303660046732412
          policy_loss: 0.09761639502313402
          total_loss: 0.6457213870353169
          vf_explained_var: 0.7358344793319702
          vf_loss: 0.5661752249010735
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 68000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,68,1406.82,68000,-39.882,-27.7,-54.1,399.05


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-10-09_22-02-50
  done: false
  episode_len_mean: 399.81
  episode_media: {}
  episode_reward_max: -27.700000000000124
  episode_reward_mean: -39.958000000000304
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 2
  episodes_total: 159
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.161374558342828
          entropy_coeff: 0.009999999999999998
          kl: 0.008907546313044624
          policy_loss: -0.08045660571919547
          total_loss: 1.0796540922588773
          vf_explained_var: 0.17223265767097473
          vf_loss: 1.1791878948609034
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 69000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,69,1425.65,69000,-39.958,-27.7,-54.1,399.81


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-10-09_22-03-09
  done: false
  episode_len_mean: 402.76
  episode_media: {}
  episode_reward_max: -28.20000000000013
  episode_reward_mean: -40.253000000000306
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 3
  episodes_total: 162
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0817906313472325
          entropy_coeff: 0.009999999999999998
          kl: 0.010252013754585064
          policy_loss: 0.008621711035569508
          total_loss: 1.1062114854653677
          vf_explained_var: 0.020898794755339622
          vf_loss: 1.1154882530371348
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,70,1444.01,70000,-40.253,-28.2,-54.1,402.76


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-10-09_22-03-27
  done: false
  episode_len_mean: 403.74
  episode_media: {}
  episode_reward_max: -28.20000000000013
  episode_reward_mean: -40.351000000000305
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 2
  episodes_total: 164
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0151816606521606
          entropy_coeff: 0.009999999999999998
          kl: 0.007628681778541161
          policy_loss: 0.10580744900637203
          total_loss: 0.666815181904369
          vf_explained_var: 0.40879881381988525
          vf_loss: 0.578987159828345
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,71,1462.31,71000,-40.351,-28.2,-54.1,403.74


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-10-09_22-03-46
  done: false
  episode_len_mean: 404.93
  episode_media: {}
  episode_reward_max: -28.20000000000013
  episode_reward_mean: -40.47000000000031
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 2
  episodes_total: 166
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.055369422170851
          entropy_coeff: 0.009999999999999998
          kl: 0.014707580410842963
          policy_loss: -0.10647326757510503
          total_loss: 0.8879651231898202
          vf_explained_var: 0.48361650109291077
          vf_loss: 1.0108038663864136
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,72,1481.32,72000,-40.47,-28.2,-54.1,404.93


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-10-09_22-04-05
  done: false
  episode_len_mean: 406.64
  episode_media: {}
  episode_reward_max: -28.20000000000013
  episode_reward_mean: -40.64100000000031
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 2
  episodes_total: 168
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9609473360909355
          entropy_coeff: 0.009999999999999998
          kl: 0.01518699582587764
          policy_loss: -0.1415975605448087
          total_loss: 1.0906041201617982
          vf_explained_var: 0.1009545549750328
          vf_loss: 1.2474864184028573
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 73000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,73,1500.44,73000,-40.641,-28.2,-54.1,406.64


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-10-09_22-04-23
  done: false
  episode_len_mean: 409.41
  episode_media: {}
  episode_reward_max: -28.20000000000013
  episode_reward_mean: -40.91800000000031
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 3
  episodes_total: 171
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9527940246793958
          entropy_coeff: 0.009999999999999998
          kl: 0.0070379757483983955
          policy_loss: 0.012872003846698336
          total_loss: 1.0691691163513395
          vf_explained_var: 0.562126100063324
          vf_loss: 1.0738208825389544
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 74000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,74,1517.94,74000,-40.918,-28.2,-54.1,409.41


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-10-09_22-04-42
  done: false
  episode_len_mean: 410.73
  episode_media: {}
  episode_reward_max: -28.20000000000013
  episode_reward_mean: -41.05000000000031
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 2
  episodes_total: 173
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9631200671195983
          entropy_coeff: 0.009999999999999998
          kl: 0.007649669556396511
          policy_loss: 0.08134256402651469
          total_loss: 0.6777900310026275
          vf_explained_var: 0.16140763461589813
          vf_loss: 0.6139003054549297
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,75,1536.68,75000,-41.05,-28.2,-54.1,410.73


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-10-09_22-05-00
  done: false
  episode_len_mean: 412.93
  episode_media: {}
  episode_reward_max: -28.20000000000013
  episode_reward_mean: -41.27000000000031
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 2
  episodes_total: 175
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.918579720126258
          entropy_coeff: 0.009999999999999998
          kl: 0.008056658340915214
          policy_loss: -0.0524515536096361
          total_loss: 1.0901817076736027
          vf_explained_var: 0.27545472979545593
          vf_loss: 1.1595248156123692
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 76000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,76,1554.9,76000,-41.27,-28.2,-54.1,412.93


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-10-09_22-05-18
  done: false
  episode_len_mean: 415.16
  episode_media: {}
  episode_reward_max: -28.20000000000013
  episode_reward_mean: -41.49300000000033
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 2
  episodes_total: 177
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.9143878724839953
          entropy_coeff: 0.009999999999999998
          kl: 0.0071142194659304415
          policy_loss: -0.13642967409557766
          total_loss: 0.9418503734800551
          vf_explained_var: 0.5150964260101318
          vf_loss: 1.0953980321685473
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 77000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,77,1572.61,77000,-41.493,-28.2,-54.1,415.16




Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-10-09_22-05-52
  done: false
  episode_len_mean: 418.0
  episode_media: {}
  episode_reward_max: -28.20000000000013
  episode_reward_mean: -41.777000000000314
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 3
  episodes_total: 180
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8984206901656258
          entropy_coeff: 0.009999999999999998
          kl: 0.004528923469725073
          policy_loss: 0.049490080773830415
          total_loss: 1.0835717734363344
          vf_explained_var: 0.5526365041732788
          vf_loss: 1.0517762175036802
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 78000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,78,1607.46,78000,-41.777,-28.2,-54.1,418


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-10-09_22-06-13
  done: false
  episode_len_mean: 419.47
  episode_media: {}
  episode_reward_max: -28.20000000000013
  episode_reward_mean: -41.924000000000326
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 2
  episodes_total: 182
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.971814849641588
          entropy_coeff: 0.009999999999999998
          kl: 0.01308568899825359
          policy_loss: -0.0460116962591807
          total_loss: 1.001903235912323
          vf_explained_var: 0.29148826003074646
          vf_loss: 1.065769895994001
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 79000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,79,1628.31,79000,-41.924,-28.2,-54.1,419.47


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-10-09_22-06-33
  done: false
  episode_len_mean: 421.41
  episode_media: {}
  episode_reward_max: -28.20000000000013
  episode_reward_mean: -42.11800000000033
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 2
  episodes_total: 184
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9881000757217406
          entropy_coeff: 0.009999999999999998
          kl: 0.014037733210479865
          policy_loss: -0.10595210376713012
          total_loss: 0.6724559259083536
          vf_explained_var: 0.5548824667930603
          vf_loss: 0.796290300703711
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,80,1647.88,80000,-42.118,-28.2,-54.1,421.41


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-10-09_22-06-53
  done: false
  episode_len_mean: 423.97
  episode_media: {}
  episode_reward_max: -28.20000000000013
  episode_reward_mean: -42.37400000000034
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 3
  episodes_total: 187
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.942355247338613
          entropy_coeff: 0.009999999999999998
          kl: 0.013673070268591481
          policy_loss: -0.019775384995672437
          total_loss: 1.1126318246126174
          vf_explained_var: 0.3141019940376282
          vf_loss: 1.149883963747157
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 81000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,81,1668.23,81000,-42.374,-28.2,-54.1,423.97


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-10-09_22-07-15
  done: false
  episode_len_mean: 425.58
  episode_media: {}
  episode_reward_max: -28.20000000000013
  episode_reward_mean: -42.535000000000345
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 2
  episodes_total: 189
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9124355726771884
          entropy_coeff: 0.009999999999999998
          kl: 0.016038637460927024
          policy_loss: -0.12778589361243778
          total_loss: 0.9803591691785388
          vf_explained_var: 0.43339967727661133
          vf_loss: 1.1249857889281378
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 82000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,82,1689.85,82000,-42.535,-28.2,-54.1,425.58


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-10-09_22-07-34
  done: false
  episode_len_mean: 428.19
  episode_media: {}
  episode_reward_max: -28.20000000000013
  episode_reward_mean: -42.79600000000034
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 3
  episodes_total: 192
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.895238881640964
          entropy_coeff: 0.009999999999999998
          kl: 0.009300669661712746
          policy_loss: 0.046633350849151614
          total_loss: 1.1614609407054053
          vf_explained_var: 0.4194941520690918
          vf_loss: 1.132455710735586
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 83000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,83,1709.15,83000,-42.796,-28.2,-54.1,428.19


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-10-09_22-07-53
  done: false
  episode_len_mean: 430.31
  episode_media: {}
  episode_reward_max: -28.20000000000013
  episode_reward_mean: -43.00800000000034
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 2
  episodes_total: 194
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8891247325473361
          entropy_coeff: 0.009999999999999998
          kl: 0.01034087901932828
          policy_loss: -0.081502808464898
          total_loss: 1.0523974395460554
          vf_explained_var: 0.08321187645196915
          vf_loss: 1.1513191238873535
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,84,1728.11,84000,-43.008,-28.2,-54.1,430.31


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-10-09_22-08-12
  done: false
  episode_len_mean: 433.55
  episode_media: {}
  episode_reward_max: -29.300000000000146
  episode_reward_mean: -43.332000000000356
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 3
  episodes_total: 197
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8781017422676087
          entropy_coeff: 0.009999999999999998
          kl: 0.009669664204027543
          policy_loss: 0.009170784221755133
          total_loss: 1.1180203275548086
          vf_explained_var: -0.043351005762815475
          vf_loss: 1.1262537668148676
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 85000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,85,1746.92,85000,-43.332,-29.3,-54.1,433.55


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-10-09_22-08-31
  done: false
  episode_len_mean: 435.48
  episode_media: {}
  episode_reward_max: -30.700000000000166
  episode_reward_mean: -43.525000000000354
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 2
  episodes_total: 199
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.879926500055525
          entropy_coeff: 0.009999999999999998
          kl: 0.009627042105578711
          policy_loss: 0.054338582936260434
          total_loss: 0.6161935482588079
          vf_explained_var: 0.2281894087791443
          vf_loss: 0.5792835054712163
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 86000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,86,1765.84,86000,-43.525,-30.7,-54.1,435.48


Result for PPO_my_env_4d34e_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-10-09_22-08-50
  done: false
  episode_len_mean: 437.85
  episode_media: {}
  episode_reward_max: -32.100000000000186
  episode_reward_mean: -43.76200000000036
  episode_reward_min: -54.1000000000005
  episodes_this_iter: 2
  episodes_total: 201
  experiment_id: 1667dda7b06542909a1096e12654b23d
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14238281249999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8463498089048598
          entropy_coeff: 0.009999999999999998
          kl: 0.012046026045272745
          policy_loss: -0.09771091938018799
          total_loss: 1.0890718814399507
          vf_explained_var: 0.3711315095424652
          vf_loss: 1.2035311463806364
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 87000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_4d34e_00000,RUNNING,192.168.3.5:258828,87,1784.7,87000,-43.762,-32.1,-54.1,437.85
