In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=8, stride=4, padding=0),  
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0), 
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
            nn.ReLU(), 
            nn.Conv2d(64, 512, kernel_size=2, stride=1, padding=0),
            nn.ReLU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        features_dim = 512
        self.encoder = VisualEncoder()
        self.encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AnnaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.action_head = nn.Linear(features_dim, action_space.n)
        self.value_head = nn.Linear(features_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.encoder.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs'].permute(0, 3, 1, 2).float() / 255.0
        if self.use_cuda:
            obs.cuda()
            
        features = self.encoder(obs)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C17']))
    env = PovOnlyWrapper(env)
    env = IgluActionWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
from ray.tune.integration.wandb import WandbLogger

tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO C17 pretrained (AnnaCNN)"
                  }
              }

        },
        loggers=[WandbLogger])

2021-09-18 16:06:42,958	INFO wandb.py:170 -- Already logged into W&B.
2021-09-18 16:06:42,975	ERROR syncer.py:72 -- Log sync requires rsync to be installed.


Trial name,status,loc
PPO_my_env_6a34a_00000,RUNNING,


[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)


[2m[36m(pid=168620)[0m 2021-09-18 16:06:46,535	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=168620)[0m 2021-09-18 16:06:46,535	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-09-18_16-07-40
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -12.0
  episode_reward_mean: -12.0
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 1
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.3670889682239957
          entropy_coeff: 0.009999999999999998
          kl: 0.023380577262915696
          policy_loss: 0.05648387355936898
          total_loss: 0.18125435608542628
          vf_explained_var: 0.5717211365699768
          vf_loss: 0.1337652596541577
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iterations_since_restore: 1
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,1,47.9469,1000,-12,-12,-12,1000


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-09-18_16-07-50
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -9.0
  episode_reward_mean: -10.5
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 2
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.304554573694865
          entropy_coeff: 0.009999999999999998
          kl: 0.00966196364176197
          policy_loss: 0.06383168548345566
          total_loss: 0.10338386793931326
          vf_explained_var: 0.6174427270889282
          vf_loss: 0.04969913876718945
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iterations_since_restore: 2
  node_ip: 192.168.3.5
  num_healthy_wo

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,2,58.3438,2000,-10.5,-9,-12,1000


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-09-18_16-08-00
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -8.0
  episode_reward_mean: -9.666666666666666
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 3
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.1480003045664893
          entropy_coeff: 0.009999999999999998
          kl: 0.017902980215514692
          policy_loss: -0.025454937294125558
          total_loss: 0.054351726671059926
          vf_explained_var: 0.8110604286193848
          vf_loss: 0.08591577326878905
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 3000
  iterations_since_restore: 3
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,3,68.3108,3000,-9.66667,-8,-12,1000


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-09-18_16-08-10
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -8.0
  episode_reward_mean: -10.25
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 4
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.8596882647938198
          entropy_coeff: 0.009999999999999998
          kl: 0.011328951106469948
          policy_loss: 0.016690279915928842
          total_loss: 0.07627820964488718
          vf_explained_var: 0.6837578415870667
          vf_loss: 0.07478612440948686
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4000
  iterations_since_restore: 4
  node_ip: 192.168.3.5
  num_health

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,4,77.7526,4000,-10.25,-8,-12,1000




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-09-18_16-08-53
  done: false
  episode_len_mean: 824.2
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -7.0
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 5
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 1.9570701294475132
          entropy_coeff: 0.009999999999999998
          kl: 0.03312427827807017
          policy_loss: -0.03640692879756292
          total_loss: 1.01073770125707
          vf_explained_var: 0.28020626306533813
          vf_loss: 1.0567780323326588
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 5000
  iterations_since_restore: 5
  node_ip: 192.168.3.5
  num_healthy_worker

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,5,121.21,5000,-7,6,-12,824.2


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-09-18_16-09-03
  done: false
  episode_len_mean: 853.5
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -7.833333333333333
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 6
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9388237396876018
          entropy_coeff: 0.009999999999999998
          kl: 0.01243431468141068
          policy_loss: -0.008174773181478183
          total_loss: 0.12707871823675101
          vf_explained_var: 0.5083362460136414
          vf_loss: 0.1490462856988112
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained: 6000
  iterations_since_restore: 6
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,6,130.868,6000,-7.83333,6,-12,853.5


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-09-18_16-09-13
  done: false
  episode_len_mean: 874.4285714285714
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -7.428571428571429
  episode_reward_min: -12.0
  episodes_this_iter: 1
  episodes_total: 7
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.056898648209042
          entropy_coeff: 0.009999999999999998
          kl: 0.01353828184062584
          policy_loss: -0.03745961909492811
          total_loss: 0.24459801192084948
          vf_explained_var: 0.6537075042724609
          vf_loss: 0.2965343894229995
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained: 7000
  iterations_since_restore: 7
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,7,140.513,7000,-7.42857,6,-12,874.429




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-09-18_16-10-03
  done: false
  episode_len_mean: 803.1111111111111
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -7.0
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 9
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.6656837317678663
          entropy_coeff: 0.009999999999999998
          kl: 0.009691746205677513
          policy_loss: -0.04750806490580241
          total_loss: 0.7492489092051983
          vf_explained_var: 0.4128987193107605
          vf_loss: 0.80905252252188
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 8000
  iterations_since_restore: 8
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,8,191.006,8000,-7,6,-17,803.111


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-09-18_16-10-15
  done: false
  episode_len_mean: 822.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -7.1
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 10
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9088264160686068
          entropy_coeff: 0.009999999999999998
          kl: 0.0077701195581519125
          policy_loss: 0.04112128756112522
          total_loss: 0.09781551866067781
          vf_explained_var: 0.3502279818058014
          vf_loss: 0.07228594259358942
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 9000
  iterations_since_restore: 9
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,9,202.618,9000,-7.1,6,-17,822.8


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-09-18_16-10-24
  done: false
  episode_len_mean: 838.9090909090909
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -6.7272727272727275
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 11
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.971891744931539
          entropy_coeff: 0.009999999999999998
          kl: 0.011741300626494599
          policy_loss: -0.0011010727948612636
          total_loss: 0.12622878899176915
          vf_explained_var: 0.61668461561203
          vf_loss: 0.14176519563835527
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 10000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,10,212.283,10000,-6.72727,6,-17,838.909




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-09-18_16-11-14
  done: false
  episode_len_mean: 793.0
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -5.461538461538462
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 13
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.9707191096411811
          entropy_coeff: 0.009999999999999998
          kl: 0.011683577840766821
          policy_loss: -0.053177201996246974
          total_loss: 0.583914022313224
          vf_explained_var: 0.5343300700187683
          vf_loss: 0.6515407888011799
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_trained: 11000
  iterations_since_restore: 11
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,11,262.075,11000,-5.46154,6,-17,793


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-09-18_16-11-26
  done: false
  episode_len_mean: 807.7857142857143
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -5.5
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 14
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.1383425328466625
          entropy_coeff: 0.009999999999999998
          kl: 0.01069537233033954
          policy_loss: 0.06140308487746451
          total_loss: 0.15643544323328468
          vf_explained_var: 0.0781848207116127
          vf_loss: 0.11160286520090368
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_trained: 12000
  iterations_since_restore: 12
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,12,273.507,12000,-5.5,6,-17,807.786


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-09-18_16-11-35
  done: false
  episode_len_mean: 820.6
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -5.0
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 15
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.045928826597002
          entropy_coeff: 0.009999999999999998
          kl: 0.013125653957697223
          policy_loss: 0.029312341494692695
          total_loss: 0.13808400246004263
          vf_explained_var: 0.24759584665298462
          vf_loss: 0.12332440658679439
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 13000
  iterations_since_restore: 13
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,13,282.534,13000,-5,6,-17,820.6


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-09-18_16-11-44
  done: false
  episode_len_mean: 831.8125
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -5.0625
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 16
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.25733843114641
          entropy_coeff: 0.009999999999999998
          kl: 0.011378293740579753
          policy_loss: -0.06985723161035114
          total_loss: -0.025295359227392407
          vf_explained_var: -0.3458927571773529
          vf_loss: 0.062015028256509036
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained: 14000
  iterations_since_restore: 14
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,14,291.632,14000,-5.0625,6,-17,831.812


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-09-18_16-11-53
  done: false
  episode_len_mean: 841.7058823529412
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -4.764705882352941
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 17
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.508435853322347
          entropy_coeff: 0.009999999999999998
          kl: 0.009501845399831666
          policy_loss: -0.03988076539503203
          total_loss: -0.03233566027548578
          vf_explained_var: -0.14936764538288116
          vf_loss: 0.028353631724086073
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 15000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,15,300.903,15000,-4.76471,6,-17,841.706


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-09-18_16-12-03
  done: false
  episode_len_mean: 850.5
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -4.5
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 18
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.39866370095147
          entropy_coeff: 0.009999999999999998
          kl: 0.013497753540101224
          policy_loss: -0.06663913782685996
          total_loss: -0.061146514924863976
          vf_explained_var: 0.37064480781555176
          vf_loss: 0.023405268902166022
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_trained: 16000
  iterations_since_restore: 16
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,16,310.347,16000,-4.5,6,-17,850.5


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-09-18_16-12-12
  done: false
  episode_len_mean: 858.3684210526316
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -4.2631578947368425
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 19
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.60370774269104
          entropy_coeff: 0.009999999999999998
          kl: 0.008118200573161694
          policy_loss: -0.1102167909240557
          total_loss: -0.12192499397529497
          vf_explained_var: -0.5022750496864319
          vf_loss: 0.010675684072905117
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_trained: 17000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,17,320.13,17000,-4.26316,6,-17,858.368


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-09-18_16-12-23
  done: false
  episode_len_mean: 865.45
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -4.05
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 20
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.6035052193535697
          entropy_coeff: 0.009999999999999998
          kl: 0.006822143986906244
          policy_loss: -0.03238322544429037
          total_loss: -0.04681883574359947
          vf_explained_var: -0.26939353346824646
          vf_loss: 0.00852947877202597
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_trained: 18000
  iterations_since_restore: 18
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,18,330.397,18000,-4.05,6,-17,865.45


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-09-18_16-12-33
  done: false
  episode_len_mean: 871.8571428571429
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -3.857142857142857
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 21
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.5005279302597048
          entropy_coeff: 0.009999999999999998
          kl: 0.0148615653162509
          policy_loss: -0.2157234638929367
          total_loss: -0.21506182932191426
          vf_explained_var: -0.3178735077381134
          vf_loss: 0.01897920976496405
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_trained: 19000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,19,340.393,19000,-3.85714,6,-17,871.857


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-09-18_16-12-42
  done: false
  episode_len_mean: 877.6818181818181
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -3.6818181818181817
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 22
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.4068985833062064
          entropy_coeff: 0.009999999999999998
          kl: 0.03700042223651964
          policy_loss: 0.0678404011660152
          total_loss: 0.0648525142007404
          vf_explained_var: 0.2182598114013672
          vf_loss: 0.004430911392490897
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,20,349.737,20000,-3.68182,6,-17,877.682


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-09-18_16-12-52
  done: false
  episode_len_mean: 883.0
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -3.5217391304347827
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 23
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5570175674226547
          entropy_coeff: 0.009999999999999998
          kl: 0.009949949233425166
          policy_loss: -0.03594713871263795
          total_loss: -0.050255102167526884
          vf_explained_var: -0.9111191630363464
          vf_loss: 0.00454599377569846
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_trained: 21000
  iterations_since_restore: 21

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,21,359.5,21000,-3.52174,6,-17,883


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-09-18_16-13-01
  done: false
  episode_len_mean: 887.875
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -3.375
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 24
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.413970539304945
          entropy_coeff: 0.009999999999999998
          kl: 0.009946620388181993
          policy_loss: -0.031489395846923195
          total_loss: -0.03735377879606353
          vf_explained_var: 0.23393145203590393
          vf_loss: 0.011561354847314458
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained: 22000
  iterations_since_restore: 22
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,22,368.947,22000,-3.375,6,-17,887.875


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-09-18_16-13-11
  done: false
  episode_len_mean: 892.36
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -3.24
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 25
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5622371620602076
          entropy_coeff: 0.009999999999999998
          kl: 0.007846308017997005
          policy_loss: -0.0954566051148706
          total_loss: -0.11288728693293201
          vf_explained_var: -1.0
          vf_loss: 0.00289543038363465
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_trained: 23000
  iterations_since_restore: 23
  node_ip: 192.168.3.5
  num_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,23,378.38,23000,-3.24,6,-17,892.36


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-09-18_16-13-21
  done: false
  episode_len_mean: 896.5
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -3.1153846153846154
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 26
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5388310564888847
          entropy_coeff: 0.009999999999999998
          kl: 0.007913037596885367
          policy_loss: -0.08542121628092395
          total_loss: -0.10038222935464647
          vf_explained_var: -0.8031092286109924
          vf_loss: 0.005085994591677768
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_trained: 24000
  iterations_since_restore: 24

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,24,388.162,24000,-3.11538,6,-17,896.5


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-09-18_16-13-30
  done: false
  episode_len_mean: 900.3333333333334
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -3.0
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 27
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.5264961216184827
          entropy_coeff: 0.009999999999999998
          kl: 0.0036638973529518576
          policy_loss: -0.13386898396743668
          total_loss: -0.15074449338846738
          vf_explained_var: -0.6281957626342773
          vf_loss: 0.005916321626864373
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_trained: 25000
  iterations_since_restore: 25
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,25,397.83,25000,-3,6,-17,900.333


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-09-18_16-13-40
  done: false
  episode_len_mean: 903.8928571428571
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -2.857142857142857
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 28
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.467068929142422
          entropy_coeff: 0.009999999999999998
          kl: 0.007571735254300351
          policy_loss: 0.1309463522500462
          total_loss: 0.11203731422622999
          vf_explained_var: 0.3144274353981018
          vf_loss: 0.003206189464415527
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_trained: 26000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,26,407.555,26000,-2.85714,6,-17,903.893


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-09-18_16-13-49
  done: false
  episode_len_mean: 907.2068965517242
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -2.7241379310344827
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 29
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.484879144032796
          entropy_coeff: 0.009999999999999998
          kl: 0.019831591988369486
          policy_loss: 0.1188467585378223
          total_loss: 0.1046080768108368
          vf_explained_var: -0.14281293749809265
          vf_loss: 0.0039169466666256385
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained: 27000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,27,416.937,27000,-2.72414,6,-17,907.207


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-09-18_16-13-59
  done: false
  episode_len_mean: 910.3
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -2.6333333333333333
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 30
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.53340417014228
          entropy_coeff: 0.009999999999999998
          kl: 0.013735513012403564
          policy_loss: -0.014920179090566105
          total_loss: -0.032245269334978524
          vf_explained_var: 0.25545358657836914
          vf_loss: 0.0033732176111597153
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28000
  iterations_since_restore: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,28,426.227,28000,-2.63333,6,-17,910.3


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-09-18_16-14-08
  done: false
  episode_len_mean: 913.1935483870968
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -2.5483870967741935
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 31
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.520556020736694
          entropy_coeff: 0.009999999999999998
          kl: 0.014283085154531111
          policy_loss: 0.054686250620418125
          total_loss: 0.0394025883740849
          vf_explained_var: -0.227853924036026
          vf_loss: 0.005101357604790893
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_trained: 29000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,29,435.398,29000,-2.54839,6,-17,913.194


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-09-18_16-14-17
  done: false
  episode_len_mean: 915.90625
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -2.46875
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 32
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.603881984286838
          entropy_coeff: 0.009999999999999998
          kl: 0.008884891117715111
          policy_loss: -0.04468931257724762
          total_loss: -0.06572166867554188
          vf_explained_var: -1.0
          vf_loss: 0.0020078121141220134
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 30000
  iterations_since_restore: 30
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,30,444.969,30000,-2.46875,6,-17,915.906


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-09-18_16-14-26
  done: false
  episode_len_mean: 918.4545454545455
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -2.393939393939394
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 33
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.5448914660347834
          entropy_coeff: 0.009999999999999998
          kl: 0.011722076049035815
          policy_loss: -0.17997491227255927
          total_loss: -0.1998554011185964
          vf_explained_var: -0.47906172275543213
          vf_loss: 0.0016122254362724358
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trained: 31000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,31,453.552,31000,-2.39394,6,-17,918.455


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-09-18_16-14-35
  done: false
  episode_len_mean: 920.8529411764706
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -2.323529411764706
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 34
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.5109880844751995
          entropy_coeff: 0.009999999999999998
          kl: 0.011955855112892206
          policy_loss: 0.08620979251960913
          total_loss: 0.06683593235082097
          vf_explained_var: 0.13686338067054749
          vf_loss: 0.001700916242003182
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_trained: 32000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,32,462.439,32000,-2.32353,6,-17,920.853


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-09-18_16-14-44
  done: false
  episode_len_mean: 923.1142857142858
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -2.257142857142857
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 35
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.557257252269321
          entropy_coeff: 0.009999999999999998
          kl: 0.008146977846499539
          policy_loss: 0.006548690547545751
          total_loss: -0.01436559334397316
          vf_explained_var: -0.4181974232196808
          vf_loss: 0.0019086826989627701
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained: 33000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,33,471.482,33000,-2.25714,6,-17,923.114


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-09-18_16-14-53
  done: false
  episode_len_mean: 925.25
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -2.1944444444444446
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 36
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.548848334948222
          entropy_coeff: 0.009999999999999998
          kl: 0.0076119084525106
          policy_loss: -0.0689220764570766
          total_loss: -0.09070639262596766
          vf_explained_var: -0.3235972225666046
          vf_loss: 0.0011351510149122785
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_trained: 34000
  iterations_since_restore: 34
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,34,479.985,34000,-2.19444,6,-17,925.25


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-09-18_16-15-01
  done: false
  episode_len_mean: 927.2702702702703
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -2.135135135135135
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 37
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.572509010632833
          entropy_coeff: 0.009999999999999998
          kl: 0.011177547066836012
          policy_loss: -0.07413210272789002
          total_loss: -0.09513660023609798
          vf_explained_var: -0.3870854675769806
          vf_loss: 0.0009481699497781745
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_trained: 35000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,35,488.382,35000,-2.13514,6,-17,927.27


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-09-18_16-15-09
  done: false
  episode_len_mean: 929.1842105263158
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -2.0789473684210527
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 38
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.598424869113498
          entropy_coeff: 0.009999999999999998
          kl: 0.009191391169818727
          policy_loss: -0.06474812510940764
          total_loss: -0.08685214759574997
          vf_explained_var: -0.6681010127067566
          vf_loss: 0.000778131173683505
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained: 36000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,36,496.673,36000,-2.07895,6,-17,929.184


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-09-18_16-15-17
  done: false
  episode_len_mean: 931.0
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -2.0256410256410255
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 39
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.572610428598192
          entropy_coeff: 0.009999999999999998
          kl: 0.008653683319882808
          policy_loss: -0.04221520647406578
          total_loss: -0.06448948333660762
          vf_explained_var: -0.5766053199768066
          vf_loss: 0.000531206997483322
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 37000
  iterations_since_restore: 37


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,37,504.711,37000,-2.02564,6,-17,931


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-09-18_16-15-26
  done: false
  episode_len_mean: 932.725
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.975
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 40
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.482022409968906
          entropy_coeff: 0.009999999999999998
          kl: 0.008207910104156902
          policy_loss: -0.06294444708360566
          total_loss: -0.08355077455441157
          vf_explained_var: -0.6399221420288086
          vf_loss: 0.0014437250553682032
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained: 38000
  iterations_since_restore: 38
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,38,513.015,38000,-1.975,6,-17,932.725


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-09-18_16-15-34
  done: false
  episode_len_mean: 934.3658536585366
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.9268292682926829
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 41
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.4406596395704483
          entropy_coeff: 0.009999999999999998
          kl: 0.00898419153487071
          policy_loss: -0.04131204151651925
          total_loss: -0.06128625062604745
          vf_explained_var: -0.9638320803642273
          vf_loss: 0.0014002223939718937
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trained: 39000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,39,521.332,39000,-1.92683,6,-17,934.366


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-09-18_16-15-43
  done: false
  episode_len_mean: 935.9285714285714
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.880952380952381
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 42
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.416805322964986
          entropy_coeff: 0.009999999999999998
          kl: 0.00983859686516229
          policy_loss: -0.04132922262781196
          total_loss: -0.05852732178237703
          vf_explained_var: -0.7327122092247009
          vf_loss: 0.0036494270637275703
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,40,530.066,40000,-1.88095,6,-17,935.929




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-09-18_16-16-09
  done: false
  episode_len_mean: 934.2325581395348
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.8372093023255813
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 43
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.4678532573911878
          entropy_coeff: 0.009999999999999998
          kl: 0.00977669746956888
          policy_loss: -0.04623100811408626
          total_loss: -0.06342430042309893
          vf_explained_var: -0.8790987730026245
          vf_loss: 0.004185604968289327
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 41000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,41,556.628,41000,-1.83721,6,-17,934.233


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-09-18_16-16-18
  done: false
  episode_len_mean: 935.7272727272727
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.7954545454545454
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 44
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.4775991413328384
          entropy_coeff: 0.009999999999999998
          kl: 0.009978224203016842
          policy_loss: -0.054001608656512365
          total_loss: -0.0736454498850637
          vf_explained_var: -0.7468075156211853
          vf_loss: 0.0017645010548423873
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,42,565.416,42000,-1.79545,6,-17,935.727


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-09-18_16-16-27
  done: false
  episode_len_mean: 937.1555555555556
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.7555555555555555
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 45
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3562517219119603
          entropy_coeff: 0.009999999999999998
          kl: 0.013491703683417993
          policy_loss: -0.07062424841440386
          total_loss: -0.08448361615753835
          vf_explained_var: -0.3154928684234619
          vf_loss: 0.005149699425480018
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,43,574.591,43000,-1.75556,6,-17,937.156


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-09-18_16-16-36
  done: false
  episode_len_mean: 938.5217391304348
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.7173913043478262
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 46
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.498113160663181
          entropy_coeff: 0.009999999999999998
          kl: 0.007620885996063434
          policy_loss: -0.03625733604033788
          total_loss: -0.05800971653726366
          vf_explained_var: -0.8936265707015991
          vf_loss: 0.0006567016898164486
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,44,582.763,44000,-1.71739,6,-17,938.522


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-09-18_16-16-45
  done: false
  episode_len_mean: 939.8297872340426
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.6808510638297873
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 47
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3478867954678004
          entropy_coeff: 0.009999999999999998
          kl: 0.008922418692231057
          policy_loss: 0.06431468731413285
          total_loss: 0.04592250229583846
          vf_explained_var: -0.6231013536453247
          vf_loss: 0.0020753667272704964
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,45,592.15,45000,-1.68085,6,-17,939.83


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-09-18_16-16-54
  done: false
  episode_len_mean: 941.0833333333334
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.6458333333333333
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 48
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3808150953716702
          entropy_coeff: 0.009999999999999998
          kl: 0.009281638785061316
          policy_loss: -0.06242520846426487
          total_loss: -0.08144021845526166
          vf_explained_var: -0.5780569911003113
          vf_loss: 0.0016605900524559224
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,46,600.982,46000,-1.64583,6,-17,941.083


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-09-18_16-17-03
  done: false
  episode_len_mean: 942.2857142857143
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.6122448979591837
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 49
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3599317603641086
          entropy_coeff: 0.009999999999999998
          kl: 0.00706868143227217
          policy_loss: -0.02746694100399812
          total_loss: -0.04710187104841073
          vf_explained_var: -0.9661502242088318
          vf_loss: 0.0015787058923807408
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,47,609.874,47000,-1.61224,6,-17,942.286




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-09-18_16-17-58
  done: false
  episode_len_mean: 933.3529411764706
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.4901960784313726
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 51
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.320815255906847
          entropy_coeff: 0.009999999999999998
          kl: 0.0072998620462356965
          policy_loss: -0.047786881691879696
          total_loss: 0.0023759115487337113
          vf_explained_var: -0.17321932315826416
          vf_loss: 0.07090724084991962
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,48,665.375,48000,-1.4902,6,-17,933.353


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-09-18_16-18-08
  done: false
  episode_len_mean: 934.6346153846154
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.4615384615384615
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 52
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.4784562508265178
          entropy_coeff: 0.009999999999999998
          kl: 0.007655309651024409
          policy_loss: 0.007878005918529298
          total_loss: -0.013123071690400442
          vf_explained_var: -0.30670133233070374
          vf_loss: 0.0011998180893392095
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,49,675.098,49000,-1.46154,6,-17,934.635


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-09-18_16-18-17
  done: false
  episode_len_mean: 935.8679245283018
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.4339622641509433
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 53
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.356647112634447
          entropy_coeff: 0.009999999999999998
          kl: 0.01010687954799404
          policy_loss: -0.016304176011019283
          total_loss: -0.0347729154345062
          vf_explained_var: -0.8308650255203247
          vf_loss: 0.0016866594928109811
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,50,684.562,50000,-1.43396,6,-17,935.868


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-09-18_16-18-26
  done: false
  episode_len_mean: 937.0555555555555
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.4074074074074074
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 54
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.5398691177368162
          entropy_coeff: 0.009999999999999998
          kl: 0.006295328678075821
          policy_loss: -0.00610105622973707
          total_loss: -0.029099483456876542
          vf_explained_var: 0.07766468077898026
          vf_loss: 0.0002755904697551159
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,51,692.998,51000,-1.40741,6,-17,937.056


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-09-18_16-18-35
  done: false
  episode_len_mean: 938.2
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.3818181818181818
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 55
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.475806548860338
          entropy_coeff: 0.009999999999999998
          kl: 0.005507781526225841
          policy_loss: -0.06082903082585997
          total_loss: -0.0830688984443744
          vf_explained_var: -0.5110281109809875
          vf_loss: 0.0006593216812689207
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
  iterations_since_restore: 52


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,52,701.978,52000,-1.38182,6,-17,938.2


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-09-18_16-18-45
  done: false
  episode_len_mean: 939.3035714285714
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.3571428571428572
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 56
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.4141886552174885
          entropy_coeff: 0.009999999999999998
          kl: 0.008150332949203214
          policy_loss: -0.08046758568121327
          total_loss: -0.10067920581334167
          vf_explained_var: -0.7163428664207458
          vf_loss: 0.001179528148430917
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,53,711.875,53000,-1.35714,6,-17,939.304


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-09-18_16-18-54
  done: false
  episode_len_mean: 940.3684210526316
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.3333333333333333
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 57
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.500933270984226
          entropy_coeff: 0.009999999999999998
          kl: 0.0070091107468568635
          policy_loss: -0.037294209375977515
          total_loss: -0.05950250451763471
          vf_explained_var: -0.07140655070543289
          vf_loss: 0.00043546247523206855
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,54,720.721,54000,-1.33333,6,-17,940.368


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-09-18_16-19-03
  done: false
  episode_len_mean: 941.3965517241379
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.3103448275862069
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 58
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3846836434470284
          entropy_coeff: 0.009999999999999998
          kl: 0.014819260580153454
          policy_loss: 0.04202855825424194
          total_loss: 0.03340553790330887
          vf_explained_var: 0.4449535012245178
          vf_loss: 0.01022231237600661
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,55,730.18,55000,-1.31034,6,-17,941.397


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-09-18_16-19-12
  done: false
  episode_len_mean: 942.3898305084746
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.2881355932203389
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 59
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.494808962610033
          entropy_coeff: 0.009999999999999998
          kl: 0.009236251177623762
          policy_loss: -0.12348763959275352
          total_loss: -0.14420984946191312
          vf_explained_var: -0.4008418321609497
          vf_loss: 0.0011086431433795952
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,56,739.354,56000,-1.28814,6,-17,942.39


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-09-18_16-19-23
  done: false
  episode_len_mean: 943.35
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.2666666666666666
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 60
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3583421309789023
          entropy_coeff: 0.009999999999999998
          kl: 0.013568814135639679
          policy_loss: -0.04624882864041461
          total_loss: -0.0642317364199294
          vf_explained_var: 0.07156095653772354
          vf_loss: 0.0010210372332949192
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000
  iterations_since_restore: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,57,749.568,57000,-1.26667,6,-17,943.35


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-09-18_16-19-33
  done: false
  episode_len_mean: 944.2786885245902
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.2459016393442623
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 61
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0904591745800443
          entropy_coeff: 0.009999999999999998
          kl: 0.009728373616449727
          policy_loss: 0.02931175749335024
          total_loss: 0.013241404874457253
          vf_explained_var: -0.8708957433700562
          vf_loss: 0.0015509138685754604
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,58,759.838,58000,-1.2459,6,-17,944.279


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-09-18_16-19-44
  done: false
  episode_len_mean: 945.1774193548387
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.2258064516129032
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 62
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2167019208272296
          entropy_coeff: 0.009999999999999998
          kl: 0.008282511220372522
          policy_loss: -0.009000593465235499
          total_loss: -0.024871079706483418
          vf_explained_var: -0.060268767178058624
          vf_loss: 0.0035011868691071867
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,59,770.512,59000,-1.22581,6,-17,945.177


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-09-18_16-19-53
  done: false
  episode_len_mean: 946.047619047619
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.2063492063492063
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 63
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.4782678365707396
          entropy_coeff: 0.009999999999999998
          kl: 0.014442453751581317
          policy_loss: 0.029237177222967148
          total_loss: 0.010512602494822608
          vf_explained_var: -1.0
          vf_loss: 0.0011837756544183422
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_since_restore: 60
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,60,779.951,60000,-1.20635,6,-17,946.048


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-09-18_16-20-02
  done: false
  episode_len_mean: 946.890625
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.1875
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 64
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.499061197704739
          entropy_coeff: 0.009999999999999998
          kl: 0.010512339026236835
          policy_loss: -0.0332981423371368
          total_loss: -0.05263775148325496
          vf_explained_var: -0.2780475914478302
          vf_loss: 0.0021030884240947974
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000
  iterations_since_restore: 61
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,61,788.996,61000,-1.1875,6,-17,946.891


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-09-18_16-20-12
  done: false
  episode_len_mean: 947.7076923076924
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.1692307692307693
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 65
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.094940814707014
          entropy_coeff: 0.009999999999999998
          kl: 0.011099380751596695
          policy_loss: 0.0022414557635784147
          total_loss: -0.013357095668713253
          vf_explained_var: -0.5701809525489807
          vf_loss: 0.001604816184974172
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,62,799.178,62000,-1.16923,6,-17,947.708


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-09-18_16-20-22
  done: false
  episode_len_mean: 948.5
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.1515151515151516
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 66
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1690964804755315
          entropy_coeff: 0.009999999999999998
          kl: 0.016103836434318158
          policy_loss: -0.009063829233249028
          total_loss: -0.023897259351280002
          vf_explained_var: -0.597825825214386
          vf_loss: 0.0014224914251826704
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,63,808.781,63000,-1.15152,6,-17,948.5


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-09-18_16-20-32
  done: false
  episode_len_mean: 949.2686567164179
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.1343283582089552
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 67
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2080962631437515
          entropy_coeff: 0.009999999999999998
          kl: 0.010283481526969594
          policy_loss: 0.0011737524635261959
          total_loss: -0.01619636867609289
          vf_explained_var: -0.5366697311401367
          vf_loss: 0.0012401644541468056
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,64,818.469,64000,-1.13433,6,-17,949.269


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-09-18_16-20-41
  done: false
  episode_len_mean: 950.0147058823529
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.1176470588235294
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 68
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3147110091315377
          entropy_coeff: 0.009999999999999998
          kl: 0.012112479123064753
          policy_loss: -0.0928427712370952
          total_loss: -0.10929345625142256
          vf_explained_var: -0.2870159447193146
          vf_loss: 0.002608460860533847
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 65000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,65,827.942,65000,-1.11765,6,-17,950.015


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-09-18_16-20-50
  done: false
  episode_len_mean: 950.7391304347826
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.1014492753623188
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 69
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.4446755382749767
          entropy_coeff: 0.009999999999999998
          kl: 0.0059911760582319696
          policy_loss: -0.0823147031168143
          total_loss: -0.103671019938257
          vf_explained_var: -0.6271396279335022
          vf_loss: 0.001068414035317902
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,66,837.045,66000,-1.10145,6,-17,950.739


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-09-18_16-21-00
  done: false
  episode_len_mean: 951.4428571428572
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.0857142857142856
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 70
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1844130555788674
          entropy_coeff: 0.009999999999999998
          kl: 0.014051260943567416
          policy_loss: -0.04286992487808069
          total_loss: -0.05269750650558207
          vf_explained_var: 0.39199379086494446
          vf_loss: 0.007274249558233552
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 67000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,67,846.414,67000,-1.08571,6,-17,951.443


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-09-18_16-21-09
  done: false
  episode_len_mean: 952.1267605633802
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.0704225352112675
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 71
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.23444803820716
          entropy_coeff: 0.009999999999999998
          kl: 0.015374650321640064
          policy_loss: -0.07178106498387125
          total_loss: -0.08156095900469357
          vf_explained_var: 0.4203774631023407
          vf_loss: 0.007375640525586076
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 68000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,68,855.878,68000,-1.07042,6,-17,952.127


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-09-18_16-21-19
  done: false
  episode_len_mean: 952.7916666666666
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.125
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 72
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3034107552634344
          entropy_coeff: 0.009999999999999998
          kl: 0.01423460977874657
          policy_loss: 0.019342604527870812
          total_loss: 0.0714603620270888
          vf_explained_var: 0.4094415009021759
          vf_loss: 0.07034768282901496
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 69000
  iterations_since_restore: 69
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,69,865.55,69000,-1.125,6,-17,952.792


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-09-18_16-21-29
  done: false
  episode_len_mean: 953.4383561643835
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.1095890410958904
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 73
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3209650225109524
          entropy_coeff: 0.009999999999999998
          kl: 0.011760266609349321
          policy_loss: -0.07847798212120931
          total_loss: 0.011269927035189337
          vf_explained_var: 0.029305381700396538
          vf_loss: 0.10898846906299392
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,70,875.464,70000,-1.10959,6,-17,953.438


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-09-18_16-21-38
  done: false
  episode_len_mean: 954.0675675675676
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.1891891891891893
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 74
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1983835948838126
          entropy_coeff: 0.009999999999999998
          kl: 0.015071017053820175
          policy_loss: 0.04954090333647198
          total_loss: 0.11397781636979845
          vf_explained_var: 0.11887650191783905
          vf_loss: 0.08133427773912748
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,71,884.996,71000,-1.18919,6,-17,954.068


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-09-18_16-21-48
  done: false
  episode_len_mean: 954.68
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.1733333333333333
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 75
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.361377713415358
          entropy_coeff: 0.009999999999999998
          kl: 0.011009007771668916
          policy_loss: -0.0277599251932568
          total_loss: -0.04506506994366646
          vf_explained_var: -0.508177638053894
          vf_loss: 0.0025930927639516693
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000
  iterations_since_restore: 72


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,72,894.679,72000,-1.17333,6,-17,954.68


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-09-18_16-21-58
  done: false
  episode_len_mean: 955.2763157894736
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.1578947368421053
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 76
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.362012667126126
          entropy_coeff: 0.009999999999999998
          kl: 0.011293318398381104
          policy_loss: -0.042689505674772796
          total_loss: -0.058618664368987085
          vf_explained_var: -0.5122891068458557
          vf_loss: 0.0038794744992628693
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 73000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,73,904.405,73000,-1.15789,6,-17,955.276


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-09-18_16-22-07
  done: false
  episode_len_mean: 955.8571428571429
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.1428571428571428
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 77
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3327783425649007
          entropy_coeff: 0.009999999999999998
          kl: 0.011384261376873579
          policy_loss: 0.09387120430668196
          total_loss: 0.07739107641908857
          vf_explained_var: 0.45245233178138733
          vf_loss: 0.0030054676301208222
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 74000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,74,913.738,74000,-1.14286,6,-17,955.857


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-09-18_16-22-17
  done: false
  episode_len_mean: 956.4230769230769
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.1153846153846154
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 78
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2808373477723864
          entropy_coeff: 0.009999999999999998
          kl: 0.0107847347101261
          policy_loss: -0.041977416599790256
          total_loss: 0.06924806390371588
          vf_explained_var: 0.25379952788352966
          vf_loss: 0.13039400776227314
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,75,923.272,75000,-1.11538,6,-17,956.423


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-09-18_16-22-27
  done: false
  episode_len_mean: 956.9746835443038
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.1012658227848102
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 79
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2726280477311875
          entropy_coeff: 0.009999999999999998
          kl: 0.010747958550140326
          policy_loss: -0.0753200762387779
          total_loss: -0.0881874591526058
          vf_explained_var: 0.08895084261894226
          vf_loss: 0.006231464890556203
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 76000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,76,933.133,76000,-1.10127,6,-17,956.975


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-09-18_16-22-36
  done: false
  episode_len_mean: 957.5125
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.0875
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 80
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.333445986111959
          entropy_coeff: 0.009999999999999998
          kl: 0.011109602754442996
          policy_loss: -0.11983919375472599
          total_loss: -0.1308373322089513
          vf_explained_var: 0.7625449895858765
          vf_loss: 0.00858683723749386
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 77000
  iterations_since_restore: 77
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,77,942.601,77000,-1.0875,6,-17,957.513




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-09-18_16-23-02
  done: false
  episode_len_mean: 956.1604938271605
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.0740740740740742
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 81
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.4134248733520507
          entropy_coeff: 0.009999999999999998
          kl: 0.012050170816677329
          policy_loss: -0.09341504077116648
          total_loss: -0.10884499384297265
          vf_explained_var: 0.19497640430927277
          vf_loss: 0.0046373643925310005
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 78000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,78,968.746,78000,-1.07407,6,-17,956.16


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-09-18_16-23-14
  done: false
  episode_len_mean: 956.6951219512196
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.0365853658536586
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 82
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.388527594672309
          entropy_coeff: 0.009999999999999998
          kl: 0.00911301744513991
          policy_loss: -0.13510787958900133
          total_loss: -0.1367648740609487
          vf_explained_var: 0.21167674660682678
          vf_loss: 0.019152638847137696
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 79000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,79,980.012,79000,-1.03659,6,-17,956.695


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-09-18_16-23-23
  done: false
  episode_len_mean: 957.2168674698795
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.0240963855421688
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 83
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2257036156124537
          entropy_coeff: 0.009999999999999998
          kl: 0.010635602629127543
          policy_loss: -0.13361983601417807
          total_loss: -0.147148962587946
          vf_explained_var: 0.3560786843299866
          vf_loss: 0.005138394609093666
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,80,989.491,80000,-1.0241,6,-17,957.217


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-09-18_16-23-32
  done: false
  episode_len_mean: 957.7261904761905
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -1.0119047619047619
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 84
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2286994059880576
          entropy_coeff: 0.009999999999999998
          kl: 0.008141746548463284
          policy_loss: -0.014605503115389083
          total_loss: -0.027591187175777222
          vf_explained_var: 0.7292709350585938
          vf_loss: 0.006553474786536147
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 81000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,81,998.785,81000,-1.0119,6,-17,957.726


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-09-18_16-23-42
  done: false
  episode_len_mean: 958.2235294117647
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.9882352941176471
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 85
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2796242674191793
          entropy_coeff: 0.009999999999999998
          kl: 0.009669047117096695
          policy_loss: -0.026547039051850636
          total_loss: 0.03651258908212185
          vf_explained_var: 0.30491766333580017
          vf_loss: 0.08259256698139426
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 82000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,82,1008.44,82000,-0.988235,6,-17,958.224


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-09-18_16-23-52
  done: false
  episode_len_mean: 958.7093023255813
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.9767441860465116
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 86
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2678010092841254
          entropy_coeff: 0.009999999999999998
          kl: 0.01660516961763996
          policy_loss: -0.03195831129948298
          total_loss: -0.03778276294469833
          vf_explained_var: 0.09394180774688721
          vf_loss: 0.011249315789124617
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 83000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,83,1018.4,83000,-0.976744,6,-17,958.709




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-09-18_16-24-41
  done: false
  episode_len_mean: 951.9090909090909
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.8636363636363636
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 88
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2274052672915987
          entropy_coeff: 0.009999999999999998
          kl: 0.013635757058759938
          policy_loss: -0.09400106064147419
          total_loss: -0.016075204147232902
          vf_explained_var: 0.6156895756721497
          vf_loss: 0.095597842703056
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,84,1067.54,84000,-0.863636,6,-17,951.909


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-09-18_16-24-52
  done: false
  episode_len_mean: 952.4494382022472
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.8314606741573034
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 89
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.3523484177059597
          entropy_coeff: 0.009999999999999998
          kl: 0.015746227895127272
          policy_loss: -0.016598949250247744
          total_loss: 0.07769457673033078
          vf_explained_var: 0.4678023159503937
          vf_loss: 0.11250265799462796
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 85000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,85,1078.71,85000,-0.831461,6,-17,952.449


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-09-18_16-25-02
  done: false
  episode_len_mean: 952.9777777777778
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.8222222222222222
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 90
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.348871694670783
          entropy_coeff: 0.009999999999999998
          kl: 0.010833983489930078
          policy_loss: 0.13747789308221803
          total_loss: 0.13047069729202324
          vf_explained_var: 0.26592713594436646
          vf_loss: 0.012825053597852173
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 86000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,86,1088.31,86000,-0.822222,6,-17,952.978


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-09-18_16-25-11
  done: false
  episode_len_mean: 953.4945054945055
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.8131868131868132
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 91
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2103344413969253
          entropy_coeff: 0.009999999999999998
          kl: 0.009769614142024154
          policy_loss: -0.10783260828918881
          total_loss: -0.12060633384519154
          vf_explained_var: 0.7229940891265869
          vf_loss: 0.006032377740161287
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 87000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,87,1097.61,87000,-0.813187,6,-17,953.495


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2021-09-18_16-25-21
  done: false
  episode_len_mean: 954.0
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.8043478260869565
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 92
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.4004220883051555
          entropy_coeff: 0.009999999999999998
          kl: 0.009986087939026254
          policy_loss: -0.1284053170018726
          total_loss: -0.14715883450375664
          vf_explained_var: 0.3931565284729004
          vf_loss: 0.00188040058581262
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_steps_sampled: 88000
    num_steps_trained: 88000
  iterations_since_restore: 88
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,88,1106.93,88000,-0.804348,6,-17,954


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 89000
  custom_metrics: {}
  date: 2021-09-18_16-25-31
  done: false
  episode_len_mean: 954.494623655914
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.7741935483870968
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 93
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.2396639611985947
          entropy_coeff: 0.009999999999999998
          kl: 0.009425203336745921
          policy_loss: -0.04106121522684892
          total_loss: -0.037973231242762674
          vf_explained_var: 0.7053161859512329
          vf_loss: 0.022303617554199365
    num_agent_steps_sampled: 89000
    num_agent_steps_trained: 89000
    num_steps_sampled: 89000
    num_steps_trained: 89000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,89,1117.11,89000,-0.774194,6,-17,954.495


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-09-18_16-25-41
  done: false
  episode_len_mean: 954.9787234042553
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.776595744680851
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 94
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7939368844032288
          entropy_coeff: 0.009999999999999998
          kl: 0.010585254786628164
          policy_loss: 0.022923836650119888
          total_loss: 0.02898104041814804
          vf_explained_var: -0.1924740970134735
          vf_loss: 0.020424042103695684
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,90,1127.56,90000,-0.776596,6,-17,954.979




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 91000
  custom_metrics: {}
  date: 2021-09-18_16-26-20
  done: false
  episode_len_mean: 951.7894736842105
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.7263157894736842
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 95
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6599336134062872
          entropy_coeff: 0.009999999999999998
          kl: 0.0093572844748536
          policy_loss: -0.08784801031773289
          total_loss: 0.0333473046310246
          vf_explained_var: 0.5752996206283569
          vf_loss: 0.13463656764943152
    num_agent_steps_sampled: 91000
    num_agent_steps_trained: 91000
    num_steps_sampled: 91000
    num_steps_trained: 91000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,91,1166.65,91000,-0.726316,6,-17,951.789




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2021-09-18_16-27-00
  done: false
  episode_len_mean: 946.7216494845361
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.6288659793814433
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 97
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.8749012072881064
          entropy_coeff: 0.009999999999999998
          kl: 0.008865397957402897
          policy_loss: 0.09542159338792165
          total_loss: 0.19765625629160138
          vf_explained_var: 0.46233507990837097
          vf_loss: 0.1179916069118513
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_steps_sampled: 92000
    num_steps_trained: 92000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,92,1206.54,92000,-0.628866,6,-17,946.722


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 93000
  custom_metrics: {}
  date: 2021-09-18_16-27-12
  done: false
  episode_len_mean: 947.265306122449
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.7551020408163265
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 98
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3374999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.0256695482465954
          entropy_coeff: 0.009999999999999998
          kl: 0.03274304560879995
          policy_loss: 0.008881798221005334
          total_loss: 0.4718125851617919
          vf_explained_var: 0.23494894802570343
          vf_loss: 0.47213670218156445
    num_agent_steps_sampled: 93000
    num_agent_steps_trained: 93000
    num_steps_sampled: 93000
    num_steps_trained: 93000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,93,1217.86,93000,-0.755102,6,-17,947.265


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 94000
  custom_metrics: {}
  date: 2021-09-18_16-27-21
  done: false
  episode_len_mean: 947.7979797979798
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.7676767676767676
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 99
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.2649994532267255
          entropy_coeff: 0.009999999999999998
          kl: 0.010650995746810644
          policy_loss: -0.03370949650804202
          total_loss: 0.17711093458864424
          vf_explained_var: 0.3298164904117584
          vf_loss: 0.2280783564162751
    num_agent_steps_sampled: 94000
    num_agent_steps_trained: 94000
    num_steps_sampled: 94000
    num_steps_trained: 94000
  iterations_since_restore: 94
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,94,1227.36,94000,-0.767677,6,-17,947.798


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 95000
  custom_metrics: {}
  date: 2021-09-18_16-27-31
  done: false
  episode_len_mean: 948.32
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.76
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 100
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.618031911055247
          entropy_coeff: 0.009999999999999998
          kl: 0.006809661680779034
          policy_loss: 0.1356133793377214
          total_loss: 0.13817643394900692
          vf_explained_var: 0.6510093808174133
          vf_loss: 0.01529598235178532
    num_agent_steps_sampled: 95000
    num_agent_steps_trained: 95000
    num_steps_sampled: 95000
    num_steps_trained: 95000
  iterations_since_restore: 95
  node_ip: 192.168.3.5
  num

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,95,1236.84,95000,-0.76,6,-17,948.32


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2021-09-18_16-27-40
  done: false
  episode_len_mean: 948.32
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.64
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 101
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.854869700802697
          entropy_coeff: 0.009999999999999998
          kl: 0.005535760776732799
          policy_loss: -0.11080258120265273
          total_loss: -0.12464827253586716
          vf_explained_var: 0.1853426843881607
          vf_loss: 0.0019005242521719387
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_steps_sampled: 96000
    num_steps_trained: 96000
  iterations_since_restore: 96
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,96,1246.3,96000,-0.64,6,-17,948.32


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 97000
  custom_metrics: {}
  date: 2021-09-18_16-27-50
  done: false
  episode_len_mean: 948.32
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.55
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 102
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.6262897080845302
          entropy_coeff: 0.009999999999999998
          kl: 0.007376009181398994
          policy_loss: -0.08015532460477617
          total_loss: -0.09139372689856423
          vf_explained_var: -0.09918142855167389
          vf_loss: 0.001290392722391213
    num_agent_steps_sampled: 97000
    num_agent_steps_trained: 97000
    num_steps_sampled: 97000
    num_steps_trained: 97000
  iterations_since_restore: 97
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,97,1255.83,97000,-0.55,6,-17,948.32


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 98000
  custom_metrics: {}
  date: 2021-09-18_16-27-59
  done: false
  episode_len_mean: 948.32
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.47
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 103
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.6481426530414158
          entropy_coeff: 0.009999999999999998
          kl: 0.005920376141791106
          policy_loss: -0.17538051820463604
          total_loss: -0.18753289646572538
          vf_explained_var: 0.3180719017982483
          vf_loss: 0.0013318552939583445
    num_agent_steps_sampled: 98000
    num_agent_steps_trained: 98000
    num_steps_sampled: 98000
    num_steps_trained: 98000
  iterations_since_restore: 98
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,98,1265.26,98000,-0.47,6,-17,948.32




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 99000
  custom_metrics: {}
  date: 2021-09-18_16-29-13
  done: false
  episode_len_mean: 944.85
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.3
  episode_reward_min: -17.0
  episodes_this_iter: 2
  episodes_total: 105
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.1018926355573866
          entropy_coeff: 0.009999999999999998
          kl: 0.012215229894619463
          policy_loss: -0.07517559429009756
          total_loss: 0.1618111740383837
          vf_explained_var: 0.42503446340560913
          vf_loss: 0.2518217334316836
    num_agent_steps_sampled: 99000
    num_agent_steps_trained: 99000
    num_steps_sampled: 99000
    num_steps_trained: 99000
  iterations_since_restore: 99
  node_ip: 192.168.3.5
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,99,1338.84,99000,-0.3,6,-17,944.85


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-09-18_16-29-25
  done: false
  episode_len_mean: 944.85
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.21
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 106
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.181885247760349
          entropy_coeff: 0.009999999999999998
          kl: 0.011698166113473105
          policy_loss: 0.026909055622915425
          total_loss: 0.2645635020194782
          vf_explained_var: 0.4689904451370239
          vf_loss: 0.2535511041680972
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  iterations_since_restore: 100
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,100,1350.75,100000,-0.21,6,-17,944.85


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 101000
  custom_metrics: {}
  date: 2021-09-18_16-29-35
  done: false
  episode_len_mean: 944.85
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: -0.17
  episode_reward_min: -17.0
  episodes_this_iter: 1
  episodes_total: 107
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9883912391132779
          entropy_coeff: 0.009999999999999998
          kl: 0.007825574744892963
          policy_loss: -0.017567840963602067
          total_loss: 0.10974200338953072
          vf_explained_var: 0.0886550098657608
          vf_loss: 0.14323206121722856
    num_agent_steps_sampled: 101000
    num_agent_steps_trained: 101000
    num_steps_sampled: 101000
    num_steps_trained: 101000
  iterations_since_restore: 101
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,101,1360.74,101000,-0.17,6,-17,944.85




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 102000
  custom_metrics: {}
  date: 2021-09-18_16-30-44
  done: false
  episode_len_mean: 945.81
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.04
  episode_reward_min: -13.0
  episodes_this_iter: 2
  episodes_total: 109
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0217105322413973
          entropy_coeff: 0.009999999999999998
          kl: 0.009397688952599025
          policy_loss: 0.0017522010538313123
          total_loss: 0.0686892402668794
          vf_explained_var: 0.03761327639222145
          vf_loss: 0.08239656447743376
    num_agent_steps_sampled: 102000
    num_agent_steps_trained: 102000
    num_steps_sampled: 102000
    num_steps_trained: 102000
  iterations_since_restore: 102
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,102,1430.07,102000,0.04,6,-13,945.81




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 103000
  custom_metrics: {}
  date: 2021-09-18_16-31-17
  done: false
  episode_len_mean: 941.34
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.16
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 110
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9323501414722866
          entropy_coeff: 0.009999999999999998
          kl: 0.00710800979137842
          policy_loss: -0.16508741438802746
          total_loss: -0.0802989593707025
          vf_explained_var: 0.09184782952070236
          vf_loss: 0.10051352497086757
    num_agent_steps_sampled: 103000
    num_agent_steps_trained: 103000
    num_steps_sampled: 103000
    num_steps_trained: 103000
  iterations_since_restore: 103
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,103,1463.02,103000,0.16,6,-13,941.34


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2021-09-18_16-31-29
  done: false
  episode_len_mean: 941.34
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.17
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 111
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.509049317571852
          entropy_coeff: 0.009999999999999998
          kl: 0.019906055578333356
          policy_loss: 0.04922690904802746
          total_loss: 0.10282159397999445
          vf_explained_var: 0.10371164977550507
          vf_loss: 0.058607738483179773
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_steps_sampled: 104000
    num_steps_trained: 104000
  iterations_since_restore: 104
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,104,1474.89,104000,0.17,6,-13,941.34




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 105000
  custom_metrics: {}
  date: 2021-09-18_16-32-17
  done: false
  episode_len_mean: 940.72
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.26
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 112
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.005447930759854
          entropy_coeff: 0.009999999999999998
          kl: 0.007886442824742402
          policy_loss: -0.028747622999880047
          total_loss: 0.08693937808275223
          vf_explained_var: 0.18945012986660004
          vf_loss: 0.13174896927602175
    num_agent_steps_sampled: 105000
    num_agent_steps_trained: 105000
    num_steps_sampled: 105000
    num_steps_trained: 105000
  iterations_since_restore: 105
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,105,1522.76,105000,0.26,6,-13,940.72




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 106000
  custom_metrics: {}
  date: 2021-09-18_16-33-03
  done: false
  episode_len_mean: 946.6
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.3
  episode_reward_min: -13.0
  episodes_this_iter: 2
  episodes_total: 114
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9812973923153347
          entropy_coeff: 0.009999999999999998
          kl: 0.009240582904874723
          policy_loss: 0.027288515037960478
          total_loss: 0.0769810708032714
          vf_explained_var: -0.06651411205530167
          vf_loss: 0.06482748487550351
    num_agent_steps_sampled: 106000
    num_agent_steps_trained: 106000
    num_steps_sampled: 106000
    num_steps_trained: 106000
  iterations_since_restore: 106
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,106,1569.26,106000,0.3,6,-13,946.6


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 107000
  custom_metrics: {}
  date: 2021-09-18_16-33-15
  done: false
  episode_len_mean: 946.6
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.28
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 115
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8289337635040284
          entropy_coeff: 0.009999999999999998
          kl: 0.010282987925444149
          policy_loss: -0.0066660661664274005
          total_loss: -0.01651849862602022
          vf_explained_var: -0.5083476901054382
          vf_loss: 0.00323114072283109
    num_agent_steps_sampled: 107000
    num_agent_steps_trained: 107000
    num_steps_sampled: 107000
    num_steps_trained: 107000
  iterations_since_restore: 107
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,107,1580.8,107000,0.28,6,-13,946.6


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2021-09-18_16-33-25
  done: false
  episode_len_mean: 946.6
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.35
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 116
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.997600213686625
          entropy_coeff: 0.009999999999999998
          kl: 0.005063767123713071
          policy_loss: -0.18088943080769646
          total_loss: -0.1530528362426493
          vf_explained_var: 0.038420699536800385
          vf_loss: 0.04524906406230811
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_steps_sampled: 108000
    num_steps_trained: 108000
  iterations_since_restore: 108
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,108,1591.2,108000,0.35,6,-13,946.6


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 109000
  custom_metrics: {}
  date: 2021-09-18_16-33-36
  done: false
  episode_len_mean: 946.6
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.35
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 117
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.106818121009403
          entropy_coeff: 0.009999999999999998
          kl: 0.018194247254637894
          policy_loss: 0.019617316292391884
          total_loss: 0.01360184070136812
          vf_explained_var: 0.717519998550415
          vf_loss: 0.005841862958105695
    num_agent_steps_sampled: 109000
    num_agent_steps_trained: 109000
    num_steps_sampled: 109000
    num_steps_trained: 109000
  iterations_since_restore: 109
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,109,1601.77,109000,0.35,6,-13,946.6


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-09-18_16-33-46
  done: false
  episode_len_mean: 946.6
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.34
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 118
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0948668241500856
          entropy_coeff: 0.009999999999999998
          kl: 0.01350827739090449
          policy_loss: -0.15086931867731943
          total_loss: -0.14410900043116676
          vf_explained_var: 0.5744604468345642
          vf_loss: 0.02087041762812684
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 110000
  iterations_since_restore: 110
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,110,1611.41,110000,0.34,6,-13,946.6


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 111000
  custom_metrics: {}
  date: 2021-09-18_16-33-56
  done: false
  episode_len_mean: 946.6
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.3
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 119
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9383375936084324
          entropy_coeff: 0.009999999999999998
          kl: 0.012653227449403762
          policy_loss: -0.1262852641960813
          total_loss: -0.04016966164732973
          vf_explained_var: 0.6163173317909241
          vf_loss: 0.09909328073263168
    num_agent_steps_sampled: 111000
    num_agent_steps_trained: 111000
    num_steps_sampled: 111000
    num_steps_trained: 111000
  iterations_since_restore: 111
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,111,1621.47,111000,0.3,6,-13,946.6


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-09-18_16-34-06
  done: false
  episode_len_mean: 946.6
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.32
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 120
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9026051839192708
          entropy_coeff: 0.009999999999999998
          kl: 0.011899059517918826
          policy_loss: -0.11951694471968545
          total_loss: -0.003020386066701677
          vf_explained_var: 0.41021475195884705
          vf_loss: 0.1294987139887073
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_trained: 112000
  iterations_since_restore: 112
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,112,1631.79,112000,0.32,6,-13,946.6


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-09-18_16-34-16
  done: false
  episode_len_mean: 946.6
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.32
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 121
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0072214669651456
          entropy_coeff: 0.009999999999999998
          kl: 0.00852238077684054
          policy_loss: -0.1716554654141267
          total_loss: -0.1734151591029432
          vf_explained_var: 0.43708187341690063
          vf_loss: 0.013998066362303992
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_trained: 113000
  iterations_since_restore: 113
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,113,1641.63,113000,0.32,6,-13,946.6


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 114000
  custom_metrics: {}
  date: 2021-09-18_16-34-27
  done: false
  episode_len_mean: 946.6
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.32
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 122
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8978182077407837
          entropy_coeff: 0.009999999999999998
          kl: 0.013585599397847462
          policy_loss: 0.006227764238913854
          total_loss: 0.005272349135743247
          vf_explained_var: 0.6134947538375854
          vf_loss: 0.011145058248399033
    num_agent_steps_sampled: 114000
    num_agent_steps_trained: 114000
    num_steps_sampled: 114000
    num_steps_trained: 114000
  iterations_since_restore: 114
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,114,1652.36,114000,0.32,6,-13,946.6


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 115000
  custom_metrics: {}
  date: 2021-09-18_16-34-37
  done: false
  episode_len_mean: 946.6
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.31
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 123
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9228509426116944
          entropy_coeff: 0.009999999999999998
          kl: 0.010655998274529389
          policy_loss: -0.044775815597838824
          total_loss: 0.04259123139911228
          vf_explained_var: 0.6160501837730408
          vf_loss: 0.1012009564269748
    num_agent_steps_sampled: 115000
    num_agent_steps_trained: 115000
    num_steps_sampled: 115000
    num_steps_trained: 115000
  iterations_since_restore: 115
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,115,1662.68,115000,0.31,6,-13,946.6


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2021-09-18_16-34-47
  done: false
  episode_len_mean: 946.6
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.31
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 124
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.0224955042203265
          entropy_coeff: 0.009999999999999998
          kl: 0.008992939272482422
          policy_loss: 0.01354437122742335
          total_loss: 0.023661233815881942
          vf_explained_var: 0.5407137274742126
          vf_loss: 0.025789138472949464
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_steps_sampled: 116000
    num_steps_trained: 116000
  iterations_since_restore: 116
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,116,1672.51,116000,0.31,6,-13,946.6


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 117000
  custom_metrics: {}
  date: 2021-09-18_16-34-57
  done: false
  episode_len_mean: 946.6
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.31
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 125
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8773762543996175
          entropy_coeff: 0.009999999999999998
          kl: 0.008621158410224724
          policy_loss: -0.13999290896786584
          total_loss: -0.12004967398113675
          vf_explained_var: -0.0018536223797127604
          vf_loss: 0.034352538113792734
    num_agent_steps_sampled: 117000
    num_agent_steps_trained: 117000
    num_steps_sampled: 117000
    num_steps_trained: 117000
  iterations_since_restore: 117
  node_ip: 192.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,117,1682.32,117000,0.31,6,-13,946.6


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 118000
  custom_metrics: {}
  date: 2021-09-18_16-35-06
  done: false
  episode_len_mean: 946.6
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.31
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 126
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.7209895451863606
          entropy_coeff: 0.009999999999999998
          kl: 0.009004613437113207
          policy_loss: -0.04432893577549193
          total_loss: -0.029004624237616856
          vf_explained_var: 0.4794319272041321
          vf_loss: 0.027975616702396008
    num_agent_steps_sampled: 118000
    num_agent_steps_trained: 118000
    num_steps_sampled: 118000
    num_steps_trained: 118000
  iterations_since_restore: 118
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,118,1691.7,118000,0.31,6,-13,946.6


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 119000
  custom_metrics: {}
  date: 2021-09-18_16-35-16
  done: false
  episode_len_mean: 946.6
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.31
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 127
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8992585327890183
          entropy_coeff: 0.009999999999999998
          kl: 0.007096368785606641
          policy_loss: -0.14509714874956342
          total_loss: -0.11478069006568856
          vf_explained_var: 0.3857593834400177
          vf_loss: 0.04571651048026979
    num_agent_steps_sampled: 119000
    num_agent_steps_trained: 119000
    num_steps_sampled: 119000
    num_steps_trained: 119000
  iterations_since_restore: 119
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,119,1701.22,119000,0.31,6,-13,946.6


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-09-18_16-35-25
  done: false
  episode_len_mean: 946.6
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.32
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 128
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.1763391243086923
          entropy_coeff: 0.009999999999999998
          kl: 0.010275055309578403
          policy_loss: -0.06383742325835758
          total_loss: -0.06162283271551132
          vf_explained_var: 0.6102808713912964
          vf_loss: 0.018776239992843734
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  iterations_since_restore: 120
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,120,1710.55,120000,0.32,6,-13,946.6


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 121000
  custom_metrics: {}
  date: 2021-09-18_16-35-34
  done: false
  episode_len_mean: 946.6
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.33
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 129
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9388151208559672
          entropy_coeff: 0.009999999999999998
          kl: 0.01329713528469858
          policy_loss: -0.05536364706026183
          total_loss: -0.015633273869752884
          vf_explained_var: 0.6063057780265808
          vf_loss: 0.052386847042685585
    num_agent_steps_sampled: 121000
    num_agent_steps_trained: 121000
    num_steps_sampled: 121000
    num_steps_trained: 121000
  iterations_since_restore: 121
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,121,1719.93,121000,0.33,6,-13,946.6


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 122000
  custom_metrics: {}
  date: 2021-09-18_16-35-44
  done: false
  episode_len_mean: 946.6
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.33
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 130
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.9263737983173794
          entropy_coeff: 0.009999999999999998
          kl: 0.011002212609494914
          policy_loss: -0.03492831736803055
          total_loss: -0.0360133182671335
          vf_explained_var: 0.5430781245231628
          vf_loss: 0.012608863030456835
    num_agent_steps_sampled: 122000
    num_agent_steps_trained: 122000
    num_steps_sampled: 122000
    num_steps_trained: 122000
  iterations_since_restore: 122
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,122,1729.4,122000,0.33,6,-13,946.6


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 123000
  custom_metrics: {}
  date: 2021-09-18_16-35-53
  done: false
  episode_len_mean: 946.6
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.35
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 131
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.6543938345379299
          entropy_coeff: 0.009999999999999998
          kl: 0.008113184794988845
          policy_loss: -0.19065508792797725
          total_loss: -0.17894472579161327
          vf_explained_var: 0.7369613647460938
          vf_loss: 0.024146996267760793
    num_agent_steps_sampled: 123000
    num_agent_steps_trained: 123000
    num_steps_sampled: 123000
    num_steps_trained: 123000
  iterations_since_restore: 123
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,123,1738.55,123000,0.35,6,-13,946.6


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-09-18_16-36-03
  done: false
  episode_len_mean: 946.6
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.36
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 132
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.5859987987412347
          entropy_coeff: 0.009999999999999998
          kl: 0.014328515869793258
          policy_loss: -0.0565927106473181
          total_loss: 0.08529146574437618
          vf_explained_var: 0.4078081250190735
          vf_loss: 0.15049035453961956
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_trained: 124000
  iterations_since_restore: 124
  node_ip: 192.168.3.5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,124,1748.22,124000,0.36,6,-13,946.6




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 125000
  custom_metrics: {}
  date: 2021-09-18_16-36-54
  done: false
  episode_len_mean: 938.47
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.42
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 133
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.641596081521776
          entropy_coeff: 0.009999999999999998
          kl: 0.010936568625881075
          policy_loss: -0.04295312662919362
          total_loss: 0.19173872247338294
          vf_explained_var: 0.1320132464170456
          vf_loss: 0.24557117232018047
    num_agent_steps_sampled: 125000
    num_agent_steps_trained: 125000
    num_steps_sampled: 125000
    num_steps_trained: 125000
  iterations_since_restore: 125
  node_ip: 192.168.3.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,125,1799.12,125000,0.42,6,-13,938.47




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 126000
  custom_metrics: {}
  date: 2021-09-18_16-37-59
  done: false
  episode_len_mean: 924.9
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.54
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 136
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.5817578236262004
          entropy_coeff: 0.009999999999999998
          kl: 0.014568184449450822
          policy_loss: -0.001188394675652186
          total_loss: 0.13637544421686065
          vf_explained_var: 0.5331628322601318
          vf_loss: 0.14600627302295632
    num_agent_steps_sampled: 126000
    num_agent_steps_trained: 126000
    num_steps_sampled: 126000
    num_steps_trained: 126000
  iterations_since_restore: 126
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,126,1863.94,126000,0.54,6,-13,924.9


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 127000
  custom_metrics: {}
  date: 2021-09-18_16-38-10
  done: false
  episode_len_mean: 924.9
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.58
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 137
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.4974589308102926
          entropy_coeff: 0.009999999999999998
          kl: 0.009372407347914901
          policy_loss: -0.04975844385723273
          total_loss: 0.024677059344119495
          vf_explained_var: 0.02506692335009575
          vf_loss: 0.08466531107616093
    num_agent_steps_sampled: 127000
    num_agent_steps_trained: 127000
    num_steps_sampled: 127000
    num_steps_trained: 127000
  iterations_since_restore: 127
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,127,1875.48,127000,0.58,6,-13,924.9


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-09-18_16-38-20
  done: false
  episode_len_mean: 924.9
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.61
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 138
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.8129900733629862
          entropy_coeff: 0.009999999999999998
          kl: 0.013265159937992306
          policy_loss: 0.010647150956922107
          total_loss: 0.13142382721934054
          vf_explained_var: 0.20879007875919342
          vf_loss: 0.13219109118605654
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_trained: 128000
  iterations_since_restore: 128
  node_ip: 192.168.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,128,1885.41,128000,0.61,6,-13,924.9




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 129000
  custom_metrics: {}
  date: 2021-09-18_16-39-07
  done: false
  episode_len_mean: 922.59
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.64
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 139
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 1.2847236818737453
          entropy_coeff: 0.009999999999999998
          kl: 0.024015112341394012
          policy_loss: 0.07942587104108599
          total_loss: 0.2950616168479125
          vf_explained_var: 0.7917361855506897
          vf_loss: 0.2163253312309583
    num_agent_steps_sampled: 129000
    num_agent_steps_trained: 129000
    num_steps_sampled: 129000
    num_steps_trained: 129000
  iterations_since_restore: 129
  node_ip: 192.168.3.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,129,1932.69,129000,0.64,6,-13,922.59


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-09-18_16-39-19
  done: false
  episode_len_mean: 922.59
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.63
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 140
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.653649870554606
          entropy_coeff: 0.009999999999999998
          kl: 0.008522189384490541
          policy_loss: 0.017539826780557634
          total_loss: 0.03710671621892187
          vf_explained_var: 0.597208559513092
          vf_loss: 0.029631847908927336
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained: 130000
  iterations_since_restore: 130
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,130,1944.45,130000,0.63,6,-13,922.59




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 131000
  custom_metrics: {}
  date: 2021-09-18_16-40-26
  done: false
  episode_len_mean: 911.01
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.72
  episode_reward_min: -13.0
  episodes_this_iter: 2
  episodes_total: 142
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8242120583852133
          entropy_coeff: 0.009999999999999998
          kl: 0.012047659213971966
          policy_loss: -0.03314260809371869
          total_loss: 0.06477348618209362
          vf_explained_var: 0.7036176323890686
          vf_loss: 0.10700952261168924
    num_agent_steps_sampled: 131000
    num_agent_steps_trained: 131000
    num_steps_sampled: 131000
    num_steps_trained: 131000
  iterations_since_restore: 131
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,131,2011.74,131000,0.72,6,-13,911.01




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2021-09-18_16-41-11
  done: false
  episode_len_mean: 907.25
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.69
  episode_reward_min: -13.0
  episodes_this_iter: 2
  episodes_total: 144
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7046727273199294
          entropy_coeff: 0.009999999999999998
          kl: 0.011783159944370543
          policy_loss: 0.0914755734304587
          total_loss: 0.1948854914969868
          vf_explained_var: -0.1992635577917099
          vf_loss: 0.11150880557898846
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_steps_sampled: 132000
    num_steps_trained: 132000
  iterations_since_restore: 132
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,132,2056.62,132000,0.69,6,-13,907.25


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 133000
  custom_metrics: {}
  date: 2021-09-18_16-41-23
  done: false
  episode_len_mean: 907.25
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.69
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 145
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.104853139983283
          entropy_coeff: 0.009999999999999998
          kl: 0.010195084565808444
          policy_loss: -0.06318613522582583
          total_loss: -0.068148492442237
          vf_explained_var: 0.23834927380084991
          vf_loss: 0.00834428178301702
    num_agent_steps_sampled: 133000
    num_agent_steps_trained: 133000
    num_steps_sampled: 133000
    num_steps_trained: 133000
  iterations_since_restore: 133
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,133,2067.85,133000,0.69,6,-13,907.25


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 134000
  custom_metrics: {}
  date: 2021-09-18_16-41-33
  done: false
  episode_len_mean: 907.25
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.68
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 146
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9170289582676359
          entropy_coeff: 0.009999999999999998
          kl: 0.008139772007632532
          policy_loss: -0.015164795600705676
          total_loss: 0.004289004703362783
          vf_explained_var: 0.6024371385574341
          vf_loss: 0.03244294571793742
    num_agent_steps_sampled: 134000
    num_agent_steps_trained: 134000
    num_steps_sampled: 134000
    num_steps_trained: 134000
  iterations_since_restore: 134
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,134,2077.8,134000,0.68,6,-13,907.25


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 135000
  custom_metrics: {}
  date: 2021-09-18_16-41-43
  done: false
  episode_len_mean: 907.25
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.67
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 147
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9393754151132372
          entropy_coeff: 0.009999999999999998
          kl: 0.007340012267430663
          policy_loss: -0.13654586780402395
          total_loss: -0.07257251764337222
          vf_explained_var: 0.7434049844741821
          vf_loss: 0.07779327867190457
    num_agent_steps_sampled: 135000
    num_agent_steps_trained: 135000
    num_steps_sampled: 135000
    num_steps_trained: 135000
  iterations_since_restore: 135
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,135,2088.05,135000,0.67,6,-13,907.25


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-09-18_16-41-53
  done: false
  episode_len_mean: 907.25
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.63
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 148
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8458625819947985
          entropy_coeff: 0.009999999999999998
          kl: 0.016236432263620395
          policy_loss: -0.06700662225484848
          total_loss: 0.24612035769969226
          vf_explained_var: 0.4609915018081665
          vf_loss: 0.3192560656772306
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained: 136000
  iterations_since_restore: 136
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,136,2098.37,136000,0.63,6,-13,907.25


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 137000
  custom_metrics: {}
  date: 2021-09-18_16-42-03
  done: false
  episode_len_mean: 907.25
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.6
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 149
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5311259819401635
          entropy_coeff: 0.009999999999999998
          kl: 0.006364067225342791
          policy_loss: 0.06388703098313676
          total_loss: 0.13341363378696972
          vf_explained_var: 0.19139918684959412
          vf_loss: 0.0800051473495033
    num_agent_steps_sampled: 137000
    num_agent_steps_trained: 137000
    num_steps_sampled: 137000
    num_steps_trained: 137000
  iterations_since_restore: 137
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,137,2108.36,137000,0.6,6,-13,907.25


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 138000
  custom_metrics: {}
  date: 2021-09-18_16-42-13
  done: false
  episode_len_mean: 907.25
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.58
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 150
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.0159215794669256
          entropy_coeff: 0.009999999999999998
          kl: 0.009394990771441581
          policy_loss: 0.07040197965171602
          total_loss: 0.08695591588815053
          vf_explained_var: 0.12081994116306305
          vf_loss: 0.02957882615737617
    num_agent_steps_sampled: 138000
    num_agent_steps_trained: 138000
    num_steps_sampled: 138000
    num_steps_trained: 138000
  iterations_since_restore: 138
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,138,2117.77,138000,0.58,6,-13,907.25


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 139000
  custom_metrics: {}
  date: 2021-09-18_16-42-23
  done: false
  episode_len_mean: 912.96
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.46
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 151
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8498979912863838
          entropy_coeff: 0.009999999999999998
          kl: 0.01764073583056292
          policy_loss: 0.031304090138938694
          total_loss: 0.27627317214177716
          vf_explained_var: 0.4437655210494995
          vf_loss: 0.25007212791177963
    num_agent_steps_sampled: 139000
    num_agent_steps_trained: 139000
    num_steps_sampled: 139000
    num_steps_trained: 139000
  iterations_since_restore: 139
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,139,2127.99,139000,0.46,6,-13,912.96


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-09-18_16-42-32
  done: false
  episode_len_mean: 912.96
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.46
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 152
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.148477296034495
          entropy_coeff: 0.009999999999999998
          kl: 0.008840707787794186
          policy_loss: -0.04570097434851858
          total_loss: -0.05253276833229595
          vf_explained_var: 0.6849467754364014
          vf_loss: 0.007939571970685696
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 140000
  iterations_since_restore: 140
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,140,2137.56,140000,0.46,6,-13,912.96




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 141000
  custom_metrics: {}
  date: 2021-09-18_16-43-44
  done: false
  episode_len_mean: 903.83
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.54
  episode_reward_min: -13.0
  episodes_this_iter: 2
  episodes_total: 154
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9560490793652006
          entropy_coeff: 0.009999999999999998
          kl: 0.006405399726378678
          policy_loss: 0.049374497102366556
          total_loss: 0.11586013320419523
          vf_explained_var: -0.08218669891357422
          vf_loss: 0.08118202802207734
    num_agent_steps_sampled: 141000
    num_agent_steps_trained: 141000
    num_steps_sampled: 141000
    num_steps_trained: 141000
  iterations_since_restore: 141
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,141,2208.9,141000,0.54,6,-13,903.83


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 142000
  custom_metrics: {}
  date: 2021-09-18_16-43-56
  done: false
  episode_len_mean: 903.83
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.54
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 155
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.1014335605833265
          entropy_coeff: 0.009999999999999998
          kl: 0.0070524621650273015
          policy_loss: -0.13529233146044944
          total_loss: -0.14575523589220313
          vf_explained_var: -0.23663999140262604
          vf_loss: 0.005195966518173615
    num_agent_steps_sampled: 142000
    num_agent_steps_trained: 142000
    num_steps_sampled: 142000
    num_steps_trained: 142000
  iterations_since_restore: 142
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,142,2221.09,142000,0.54,6,-13,903.83




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 143000
  custom_metrics: {}
  date: 2021-09-18_16-44-44
  done: false
  episode_len_mean: 901.81
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.53
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 156
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.9582120378812153
          entropy_coeff: 0.009999999999999998
          kl: 0.006238018935883257
          policy_loss: 0.017758348749743566
          total_loss: 0.170860593020916
          vf_explained_var: -0.32372573018074036
          vf_loss: 0.1679473689524457
    num_agent_steps_sampled: 143000
    num_agent_steps_trained: 143000
    num_steps_sampled: 143000
    num_steps_trained: 143000
  iterations_since_restore: 143
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,143,2269.24,143000,0.53,6,-13,901.81




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2021-09-18_16-45-20
  done: false
  episode_len_mean: 900.89
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.53
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 157
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.0202280865775215
          entropy_coeff: 0.009999999999999998
          kl: 0.00895012431251619
          policy_loss: -0.047660395916965276
          total_loss: 0.03329586833715439
          vf_explained_var: 0.44870513677597046
          vf_loss: 0.09436204409123294
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000
    num_steps_sampled: 144000
    num_steps_trained: 144000
  iterations_since_restore: 144
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,144,2305.46,144000,0.53,6,-13,900.89


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 145000
  custom_metrics: {}
  date: 2021-09-18_16-45-32
  done: false
  episode_len_mean: 900.89
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.53
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 158
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.064853477478027
          entropy_coeff: 0.009999999999999998
          kl: 0.008623892380053672
          policy_loss: -0.09905536874300903
          total_loss: -0.10917085044913821
          vf_explained_var: -0.2424619048833847
          vf_loss: 0.003984285250771791
    num_agent_steps_sampled: 145000
    num_agent_steps_trained: 145000
    num_steps_sampled: 145000
    num_steps_trained: 145000
  iterations_since_restore: 145
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,145,2317.22,145000,0.53,6,-13,900.89




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 146000
  custom_metrics: {}
  date: 2021-09-18_16-46-16
  done: false
  episode_len_mean: 895.07
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.59
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 159
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.159372443623013
          entropy_coeff: 0.009999999999999998
          kl: 0.010492082123510767
          policy_loss: -0.1093989630540212
          total_loss: -0.05748350839647982
          vf_explained_var: 0.45623689889907837
          vf_loss: 0.06554175354928399
    num_agent_steps_sampled: 146000
    num_agent_steps_trained: 146000
    num_steps_sampled: 146000
    num_steps_trained: 146000
  iterations_since_restore: 146
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,146,2361.12,146000,0.59,6,-13,895.07




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 147000
  custom_metrics: {}
  date: 2021-09-18_16-46-56
  done: false
  episode_len_mean: 888.06
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.65
  episode_reward_min: -13.0
  episodes_this_iter: 2
  episodes_total: 161
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 2.070094048976898
          entropy_coeff: 0.009999999999999998
          kl: 0.005859959351575405
          policy_loss: -0.2660208213660452
          total_loss: -0.15454463155733214
          vf_explained_var: 0.7149320244789124
          vf_loss: 0.1277272221373601
    num_agent_steps_sampled: 147000
    num_agent_steps_trained: 147000
    num_steps_sampled: 147000
    num_steps_trained: 147000
  iterations_since_restore: 147
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,147,2401.26,147000,0.65,6,-13,888.06


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2021-09-18_16-47-08
  done: false
  episode_len_mean: 888.06
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.62
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 162
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.793657214111752
          entropy_coeff: 0.009999999999999998
          kl: 0.022114263113768354
          policy_loss: 0.032344404525227015
          total_loss: 0.13784670424130227
          vf_explained_var: 0.7331334352493286
          vf_loss: 0.10664585631909884
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_steps_sampled: 148000
    num_steps_trained: 148000
  iterations_since_restore: 148
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,148,2412.75,148000,0.62,6,-13,888.06


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 149000
  custom_metrics: {}
  date: 2021-09-18_16-47-18
  done: false
  episode_len_mean: 888.06
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.62
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 163
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8842369940545824
          entropy_coeff: 0.009999999999999998
          kl: 0.006099822591892007
          policy_loss: -0.0847491910888089
          total_loss: -0.016011113342311648
          vf_explained_var: 0.22199414670467377
          vf_loss: 0.08063236840276254
    num_agent_steps_sampled: 149000
    num_agent_steps_trained: 149000
    num_steps_sampled: 149000
    num_steps_trained: 149000
  iterations_since_restore: 149
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,149,2422.97,149000,0.62,6,-13,888.06


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 150000
  custom_metrics: {}
  date: 2021-09-18_16-47-28
  done: false
  episode_len_mean: 888.06
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.56
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 164
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7990270177523295
          entropy_coeff: 0.009999999999999998
          kl: 0.0075250516579167485
          policy_loss: -0.1237764057599836
          total_loss: 0.006170694147133165
          vf_explained_var: 0.8076222538948059
          vf_loss: 0.13936586258932948
    num_agent_steps_sampled: 150000
    num_agent_steps_trained: 150000
    num_steps_sampled: 150000
    num_steps_trained: 150000
  iterations_since_restore: 150
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,150,2432.89,150000,0.56,6,-13,888.06


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 151000
  custom_metrics: {}
  date: 2021-09-18_16-47-38
  done: false
  episode_len_mean: 888.06
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.57
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 165
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8749460524982877
          entropy_coeff: 0.009999999999999998
          kl: 0.006071861527260363
          policy_loss: -0.03626537521680196
          total_loss: -0.01067571027411355
          vf_explained_var: 0.8743882179260254
          vf_loss: 0.03742289523490601
    num_agent_steps_sampled: 151000
    num_agent_steps_trained: 151000
    num_steps_sampled: 151000
    num_steps_trained: 151000
  iterations_since_restore: 151
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,151,2443.06,151000,0.57,6,-13,888.06




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2021-09-18_16-48-26
  done: false
  episode_len_mean: 883.81
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.61
  episode_reward_min: -13.0
  episodes_this_iter: 2
  episodes_total: 167
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.75989691151513
          entropy_coeff: 0.009999999999999998
          kl: 0.008386945768045292
          policy_loss: -0.15538976076576444
          total_loss: -0.018320794155200323
          vf_explained_var: 0.6711099147796631
          vf_loss: 0.1451146756609281
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_steps_sampled: 152000
    num_steps_trained: 152000
  iterations_since_restore: 152
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,152,2491.35,152000,0.61,6,-13,883.81




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 153000
  custom_metrics: {}
  date: 2021-09-18_16-49-23
  done: false
  episode_len_mean: 871.38
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.7
  episode_reward_min: -13.0
  episodes_this_iter: 2
  episodes_total: 169
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8834109160635206
          entropy_coeff: 0.009999999999999998
          kl: 0.007408183007859471
          policy_loss: -0.07139522226320373
          total_loss: 0.09120729371077485
          vf_explained_var: 0.8207893967628479
          vf_loss: 0.17299824169216058
    num_agent_steps_sampled: 153000
    num_agent_steps_trained: 153000
    num_steps_sampled: 153000
    num_steps_trained: 153000
  iterations_since_restore: 153
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,153,2548.01,153000,0.7,6,-13,871.38




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 154000
  custom_metrics: {}
  date: 2021-09-18_16-50-16
  done: false
  episode_len_mean: 859.44
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.8
  episode_reward_min: -13.0
  episodes_this_iter: 2
  episodes_total: 171
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8798991243044536
          entropy_coeff: 0.009999999999999998
          kl: 0.0060261885783590005
          policy_loss: -0.02189245865576797
          total_loss: 0.12231150538557106
          vf_explained_var: 0.8825876712799072
          vf_loss: 0.1561387496482995
    num_agent_steps_sampled: 154000
    num_agent_steps_trained: 154000
    num_steps_sampled: 154000
    num_steps_trained: 154000
  iterations_since_restore: 154
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,154,2601.07,154000,0.8,6,-13,859.44




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 155000
  custom_metrics: {}
  date: 2021-09-18_16-51-13
  done: false
  episode_len_mean: 852.69
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 0.91
  episode_reward_min: -13.0
  episodes_this_iter: 2
  episodes_total: 173
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7617005056805082
          entropy_coeff: 0.009999999999999998
          kl: 0.008118649407628473
          policy_loss: 0.030389835975236364
          total_loss: 0.12924490177796946
          vf_explained_var: 0.5927925705909729
          vf_loss: 0.10722442016833358
    num_agent_steps_sampled: 155000
    num_agent_steps_trained: 155000
    num_steps_sampled: 155000
    num_steps_trained: 155000
  iterations_since_restore: 155
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,155,2657.37,155000,0.91,6,-13,852.69




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 156000
  custom_metrics: {}
  date: 2021-09-18_16-51-43
  done: false
  episode_len_mean: 845.9
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 1.04
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 174
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8401538875367907
          entropy_coeff: 0.009999999999999998
          kl: 0.008183155886859122
          policy_loss: -0.19643741771578788
          total_loss: -0.050157892372873096
          vf_explained_var: 0.7532452940940857
          vf_loss: 0.1553599375817511
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_steps_sampled: 156000
    num_steps_trained: 156000
  iterations_since_restore: 156
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,156,2687.95,156000,1.04,6,-13,845.9




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 157000
  custom_metrics: {}
  date: 2021-09-18_16-52-23
  done: false
  episode_len_mean: 839.68
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 1.12
  episode_reward_min: -13.0
  episodes_this_iter: 2
  episodes_total: 176
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.8810569723447164
          entropy_coeff: 0.009999999999999998
          kl: 0.013687265136887098
          policy_loss: -0.08840791814857059
          total_loss: -0.010946833259529538
          vf_explained_var: 0.929923415184021
          vf_loss: 0.08068100506853726
    num_agent_steps_sampled: 157000
    num_agent_steps_trained: 157000
    num_steps_sampled: 157000
    num_steps_trained: 157000
  iterations_since_restore: 157
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,157,2727.58,157000,1.12,6,-13,839.68




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 158000
  custom_metrics: {}
  date: 2021-09-18_16-53-25
  done: false
  episode_len_mean: 833.12
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 1.22
  episode_reward_min: -13.0
  episodes_this_iter: 2
  episodes_total: 178
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7138471338484023
          entropy_coeff: 0.009999999999999998
          kl: 0.006734503890565533
          policy_loss: 0.12482104918195142
          total_loss: 0.19894519589013523
          vf_explained_var: 0.8533993363380432
          vf_loss: 0.08359159684429567
    num_agent_steps_sampled: 158000
    num_agent_steps_trained: 158000
    num_steps_sampled: 158000
    num_steps_trained: 158000
  iterations_since_restore: 158
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,158,2789.6,158000,1.22,6,-13,833.12


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 159000
  custom_metrics: {}
  date: 2021-09-18_16-53-35
  done: false
  episode_len_mean: 833.12
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 1.24
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 179
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.794273070494334
          entropy_coeff: 0.009999999999999998
          kl: 0.005893443899210836
          policy_loss: 0.007525887423091465
          total_loss: 0.10130260437726975
          vf_explained_var: 0.783203125
          vf_loss: 0.10500644704120027
    num_agent_steps_sampled: 159000
    num_agent_steps_trained: 159000
    num_steps_sampled: 159000
    num_steps_trained: 159000
  iterations_since_restore: 159
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,159,2799.79,159000,1.24,6,-13,833.12




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-09-18_16-54-21
  done: false
  episode_len_mean: 832.02
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 1.3
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 180
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.6342926621437073
          entropy_coeff: 0.009999999999999998
          kl: 0.003739820904205102
          policy_loss: 0.1809181135561731
          total_loss: 0.18751655634906556
          vf_explained_var: 0.9270432591438293
          vf_loss: 0.0186814796179533
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 160000
  iterations_since_restore: 160
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,160,2845.77,160000,1.3,6,-13,832.02




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 161000
  custom_metrics: {}
  date: 2021-09-18_16-55-36
  done: false
  episode_len_mean: 814.3
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 1.44
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 183
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9244172043270535
          entropy_coeff: 0.009999999999999998
          kl: 0.007875726779798666
          policy_loss: -0.035176515248086716
          total_loss: 0.0820302085330089
          vf_explained_var: 0.8855911493301392
          vf_loss: 0.13196542635560035
    num_agent_steps_sampled: 161000
    num_agent_steps_trained: 161000
    num_steps_sampled: 161000
    num_steps_trained: 161000
  iterations_since_restore: 161
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,161,2921.08,161000,1.44,6,-13,814.3




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 162000
  custom_metrics: {}
  date: 2021-09-18_16-56-33
  done: false
  episode_len_mean: 803.06
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 1.54
  episode_reward_min: -13.0
  episodes_this_iter: 2
  episodes_total: 185
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6303341680102879
          entropy_coeff: 0.009999999999999998
          kl: 0.011162457059279932
          policy_loss: 0.0010445382859971789
          total_loss: 0.41250074967328043
          vf_explained_var: 0.5511208176612854
          vf_loss: 0.42140218048459954
    num_agent_steps_sampled: 162000
    num_agent_steps_trained: 162000
    num_steps_sampled: 162000
    num_steps_trained: 162000
  iterations_since_restore: 162
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,162,2977.8,162000,1.54,6,-13,803.06




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 163000
  custom_metrics: {}
  date: 2021-09-18_16-57-46
  done: false
  episode_len_mean: 790.11
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 1.63
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 188
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.692093887594011
          entropy_coeff: 0.009999999999999998
          kl: 0.014095249437662084
          policy_loss: -0.07403871284590827
          total_loss: 0.11390207496782144
          vf_explained_var: 0.871947169303894
          vf_loss: 0.1968340398122867
    num_agent_steps_sampled: 163000
    num_agent_steps_trained: 163000
    num_steps_sampled: 163000
    num_steps_trained: 163000
  iterations_since_restore: 163
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,163,3051.12,163000,1.63,6,-13,790.11


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 164000
  custom_metrics: {}
  date: 2021-09-18_16-57-58
  done: false
  episode_len_mean: 790.11
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 1.65
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 189
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.838579601711697
          entropy_coeff: 0.009999999999999998
          kl: 0.016392065719166727
          policy_loss: 0.021407435172133976
          total_loss: 0.08327972247368759
          vf_explained_var: 0.8261588215827942
          vf_loss: 0.07092228877461619
    num_agent_steps_sampled: 164000
    num_agent_steps_trained: 164000
    num_steps_sampled: 164000
    num_steps_trained: 164000
  iterations_since_restore: 164
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,164,3062.34,164000,1.65,6,-13,790.11


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 165000
  custom_metrics: {}
  date: 2021-09-18_16-58-07
  done: false
  episode_len_mean: 790.11
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 1.65
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 190
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9795056382815044
          entropy_coeff: 0.009999999999999998
          kl: 0.013890278851710312
          policy_loss: 0.11365845211678081
          total_loss: 0.16644094495309725
          vf_explained_var: 0.2699771523475647
          vf_loss: 0.06466660211897558
    num_agent_steps_sampled: 165000
    num_agent_steps_trained: 165000
    num_steps_sampled: 165000
    num_steps_trained: 165000
  iterations_since_restore: 165
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,165,3071.62,165000,1.65,6,-13,790.11




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 166000
  custom_metrics: {}
  date: 2021-09-18_16-59-36
  done: false
  episode_len_mean: 770.87
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 1.78
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 193
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8461644728978475
          entropy_coeff: 0.009999999999999998
          kl: 0.005590380375256032
          policy_loss: -0.09726635076933438
          total_loss: 0.041361833405163556
          vf_explained_var: 0.9254485964775085
          vf_loss: 0.15390593463348018
    num_agent_steps_sampled: 166000
    num_agent_steps_trained: 166000
    num_steps_sampled: 166000
    num_steps_trained: 166000
  iterations_since_restore: 166
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,166,3160.53,166000,1.78,6,-13,770.87




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 167000
  custom_metrics: {}
  date: 2021-09-18_17-00-09
  done: false
  episode_len_mean: 763.61
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 1.85
  episode_reward_min: -13.0
  episodes_this_iter: 1
  episodes_total: 194
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.758978980117374
          entropy_coeff: 0.009999999999999998
          kl: 0.01028616144408166
          policy_loss: -0.15613119983010823
          total_loss: -0.03953653987911013
          vf_explained_var: 0.8627466559410095
          vf_loss: 0.12832616290284526
    num_agent_steps_sampled: 167000
    num_agent_steps_trained: 167000
    num_steps_sampled: 167000
    num_steps_trained: 167000
  iterations_since_restore: 167
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,167,3193.12,167000,1.85,6,-13,763.61




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 168000
  custom_metrics: {}
  date: 2021-09-18_17-00-56
  done: false
  episode_len_mean: 758.98
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 1.84
  episode_reward_min: -13.0
  episodes_this_iter: 3
  episodes_total: 197
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8349338889122009
          entropy_coeff: 0.009999999999999998
          kl: 0.01853024752671995
          policy_loss: 0.0400211734076341
          total_loss: 0.15806392634080516
          vf_explained_var: 0.8826656937599182
          vf_loss: 0.12583853668636746
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 168000
    num_steps_sampled: 168000
    num_steps_trained: 168000
  iterations_since_restore: 168
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,168,3240.9,168000,1.84,6,-13,758.98




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 169000
  custom_metrics: {}
  date: 2021-09-18_17-02-31
  done: false
  episode_len_mean: 731.51
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.23
  episode_reward_min: -9.0
  episodes_this_iter: 4
  episodes_total: 201
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.915089217821757
          entropy_coeff: 0.009999999999999998
          kl: 0.0067748796043572805
          policy_loss: -0.15368190198722814
          total_loss: -0.06533215484685367
          vf_explained_var: 0.9522221088409424
          vf_loss: 0.10364213209185336
    num_agent_steps_sampled: 169000
    num_agent_steps_trained: 169000
    num_steps_sampled: 169000
    num_steps_trained: 169000
  iterations_since_restore: 169
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,169,3335.46,169000,2.23,6,-9,731.51




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 170000
  custom_metrics: {}
  date: 2021-09-18_17-03-00
  done: false
  episode_len_mean: 725.08
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.29
  episode_reward_min: -9.0
  episodes_this_iter: 1
  episodes_total: 202
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.544854720433553
          entropy_coeff: 0.009999999999999998
          kl: 0.008589480259011623
          policy_loss: -0.05817587441868252
          total_loss: 0.1222751607083612
          vf_explained_var: 0.8872463703155518
          vf_loss: 0.19100760037286413
    num_agent_steps_sampled: 170000
    num_agent_steps_trained: 170000
    num_steps_sampled: 170000
    num_steps_trained: 170000
  iterations_since_restore: 170
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,170,3364.2,170000,2.29,6,-9,725.08




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 171000
  custom_metrics: {}
  date: 2021-09-18_17-03-42
  done: false
  episode_len_mean: 722.96
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.28
  episode_reward_min: -9.0
  episodes_this_iter: 2
  episodes_total: 204
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5747882604599
          entropy_coeff: 0.009999999999999998
          kl: 0.015770064533701837
          policy_loss: -0.1064097781976064
          total_loss: -0.023952368771036466
          vf_explained_var: 0.6319507956504822
          vf_loss: 0.08922375316421191
    num_agent_steps_sampled: 171000
    num_agent_steps_trained: 171000
    num_steps_sampled: 171000
    num_steps_trained: 171000
  iterations_since_restore: 171
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,171,3406.14,171000,2.28,6,-9,722.96




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 172000
  custom_metrics: {}
  date: 2021-09-18_17-04-21
  done: false
  episode_len_mean: 723.79
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.33
  episode_reward_min: -9.0
  episodes_this_iter: 2
  episodes_total: 206
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5695312499999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9157707889874775
          entropy_coeff: 0.009999999999999998
          kl: 0.004450683330219757
          policy_loss: -0.0977612154972222
          total_loss: -0.0905511924996972
          vf_explained_var: 0.7008118629455566
          vf_loss: 0.02383292725102769
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_steps_sampled: 172000
    num_steps_trained: 172000
  iterations_since_restore: 172
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,172,3445.62,172000,2.33,6,-9,723.79




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 173000
  custom_metrics: {}
  date: 2021-09-18_17-04-51
  done: false
  episode_len_mean: 716.38
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.4
  episode_reward_min: -9.0
  episodes_this_iter: 1
  episodes_total: 207
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.7972085224257575
          entropy_coeff: 0.009999999999999998
          kl: 0.017422025236148532
          policy_loss: -0.0691113336218728
          total_loss: 0.004741486575868394
          vf_explained_var: 0.8218594193458557
          vf_loss: 0.08686370932393604
    num_agent_steps_sampled: 173000
    num_agent_steps_trained: 173000
    num_steps_sampled: 173000
    num_steps_trained: 173000
  iterations_since_restore: 173
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,173,3475.2,173000,2.4,6,-9,716.38




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 174000
  custom_metrics: {}
  date: 2021-09-18_17-05-34
  done: false
  episode_len_mean: 721.4
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.36
  episode_reward_min: -9.0
  episodes_this_iter: 2
  episodes_total: 209
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2847656249999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5070011602507698
          entropy_coeff: 0.009999999999999998
          kl: 0.024836788024234824
          policy_loss: -0.03943040370941162
          total_loss: 0.24264829386439588
          vf_explained_var: 0.5102092623710632
          vf_loss: 0.29007604662328956
    num_agent_steps_sampled: 174000
    num_agent_steps_trained: 174000
    num_steps_sampled: 174000
    num_steps_trained: 174000
  iterations_since_restore: 174
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,174,3518.18,174000,2.36,6,-9,721.4




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 175000
  custom_metrics: {}
  date: 2021-09-18_17-06-07
  done: false
  episode_len_mean: 720.02
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.38
  episode_reward_min: -9.0
  episodes_this_iter: 1
  episodes_total: 210
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.7433408564991422
          entropy_coeff: 0.009999999999999998
          kl: 0.011898856446415746
          policy_loss: -0.05768286453353034
          total_loss: 0.1880754515528679
          vf_explained_var: 0.7081406712532043
          vf_loss: 0.2581091456943088
    num_agent_steps_sampled: 175000
    num_agent_steps_trained: 175000
    num_steps_sampled: 175000
    num_steps_trained: 175000
  iterations_since_restore: 175
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,175,3551.04,175000,2.38,6,-9,720.02




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 176000
  custom_metrics: {}
  date: 2021-09-18_17-06-44
  done: false
  episode_len_mean: 712.92
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.4
  episode_reward_min: -9.0
  episodes_this_iter: 2
  episodes_total: 212
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.9400378651089139
          entropy_coeff: 0.009999999999999998
          kl: 0.010762229588081595
          policy_loss: -0.13504240156875716
          total_loss: -0.07853739427195655
          vf_explained_var: 0.6878012418746948
          vf_loss: 0.07130831362058719
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 176000
    num_steps_sampled: 176000
    num_steps_trained: 176000
  iterations_since_restore: 176
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,176,3588.67,176000,2.4,6,-9,712.92


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 177000
  custom_metrics: {}
  date: 2021-09-18_17-06-55
  done: false
  episode_len_mean: 712.92
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.4
  episode_reward_min: -9.0
  episodes_this_iter: 1
  episodes_total: 213
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.075808718469408
          entropy_coeff: 0.009999999999999998
          kl: 0.006186561377982063
          policy_loss: -0.008801175819502936
          total_loss: 0.006829591364496284
          vf_explained_var: 0.5639796853065491
          vf_loss: 0.03374627354658312
    num_agent_steps_sampled: 177000
    num_agent_steps_trained: 177000
    num_steps_sampled: 177000
    num_steps_trained: 177000
  iterations_since_restore: 177
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,177,3599.65,177000,2.4,6,-9,712.92


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 178000
  custom_metrics: {}
  date: 2021-09-18_17-07-05
  done: false
  episode_len_mean: 716.23
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.39
  episode_reward_min: -9.0
  episodes_this_iter: 1
  episodes_total: 214
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.8662616464826796
          entropy_coeff: 0.009999999999999998
          kl: 0.008820844982727137
          policy_loss: 0.025326057233744197
          total_loss: 0.08818759491874112
          vf_explained_var: 0.1400085687637329
          vf_loss: 0.07775633947716819
    num_agent_steps_sampled: 178000
    num_agent_steps_trained: 178000
    num_steps_sampled: 178000
    num_steps_trained: 178000
  iterations_since_restore: 178
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,178,3609.09,178000,2.39,6,-9,716.23


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 179000
  custom_metrics: {}
  date: 2021-09-18_17-07-14
  done: false
  episode_len_mean: 716.23
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.42
  episode_reward_min: -9.0
  episodes_this_iter: 1
  episodes_total: 215
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.0988161166508994
          entropy_coeff: 0.009999999999999998
          kl: 0.007009349464907929
          policy_loss: -0.03347792625427246
          total_loss: -0.023767783637675975
          vf_explained_var: 0.2672085762023926
          vf_loss: 0.027704269526940252
    num_agent_steps_sampled: 179000
    num_agent_steps_trained: 179000
    num_steps_sampled: 179000
    num_steps_trained: 179000
  iterations_since_restore: 179
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,179,3618.23,179000,2.42,6,-9,716.23




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-09-18_17-08-18
  done: false
  episode_len_mean: 699.22
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.56
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 218
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.0208683292071026
          entropy_coeff: 0.009999999999999998
          kl: 0.011599847719199531
          policy_loss: 0.03979698005649779
          total_loss: 0.0651574685341782
          vf_explained_var: 0.00904154684394598
          vf_loss: 0.04061431679615958
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 180000
  iterations_since_restore: 180
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,180,3682.56,180000,2.56,6,-9,699.22


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 181000
  custom_metrics: {}
  date: 2021-09-18_17-08-29
  done: false
  episode_len_mean: 699.22
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.6
  episode_reward_min: -9.0
  episodes_this_iter: 1
  episodes_total: 219
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 2.0346039123005335
          entropy_coeff: 0.009999999999999998
          kl: 0.013050374171296708
          policy_loss: -0.03269350628058116
          total_loss: -0.002158361714747217
          vf_explained_var: 0.49769675731658936
          vf_loss: 0.045306736996604334
    num_agent_steps_sampled: 181000
    num_agent_steps_trained: 181000
    num_steps_sampled: 181000
    num_steps_trained: 181000
  iterations_since_restore: 181
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,181,3693.37,181000,2.6,6,-9,699.22


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 182000
  custom_metrics: {}
  date: 2021-09-18_17-08-39
  done: false
  episode_len_mean: 699.22
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.6
  episode_reward_min: -9.0
  episodes_this_iter: 1
  episodes_total: 220
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 0.49249134759108226
          entropy_coeff: 0.009999999999999998
          kl: 0.0057653664357538836
          policy_loss: -0.015202438665760888
          total_loss: 0.3116461666093932
          vf_explained_var: 0.5596858263015747
          vf_loss: 0.3293108511302206
    num_agent_steps_sampled: 182000
    num_agent_steps_trained: 182000
    num_steps_sampled: 182000
    num_steps_trained: 182000
  iterations_since_restore: 182
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,182,3703.63,182000,2.6,6,-9,699.22




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 183000
  custom_metrics: {}
  date: 2021-09-18_17-09-54
  done: false
  episode_len_mean: 687.43
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.68
  episode_reward_min: -9.0
  episodes_this_iter: 2
  episodes_total: 222
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.4008800321155124
          entropy_coeff: 0.009999999999999998
          kl: 0.017840069433194668
          policy_loss: 0.019143868486086527
          total_loss: 0.223934918973181
          vf_explained_var: 0.6665905714035034
          vf_loss: 0.21117949436108271
    num_agent_steps_sampled: 183000
    num_agent_steps_trained: 183000
    num_steps_sampled: 183000
    num_steps_trained: 183000
  iterations_since_restore: 183
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,183,3778.08,183000,2.68,6,-9,687.43




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2021-09-18_17-10-35
  done: false
  episode_len_mean: 679.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.74
  episode_reward_min: -9.0
  episodes_this_iter: 2
  episodes_total: 224
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.7689861893653869
          entropy_coeff: 0.009999999999999998
          kl: 0.008134382154812828
          policy_loss: 0.19643797626097997
          total_loss: 0.23242118656635286
          vf_explained_var: 0.34810808300971985
          vf_loss: 0.05019848552635974
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_steps_sampled: 184000
    num_steps_trained: 184000
  iterations_since_restore: 184
  node_ip: 192.16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,184,3819.44,184000,2.74,6,-9,679.8




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 185000
  custom_metrics: {}
  date: 2021-09-18_17-12-10
  done: false
  episode_len_mean: 649.07
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.95
  episode_reward_min: -9.0
  episodes_this_iter: 4
  episodes_total: 228
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.2379867706033918
          entropy_coeff: 0.009999999999999998
          kl: 0.0069014641965834496
          policy_loss: -0.046531095852454504
          total_loss: 0.12407240428858334
          vf_explained_var: 0.891657829284668
          vf_loss: 0.1800354147122966
    num_agent_steps_sampled: 185000
    num_agent_steps_trained: 185000
    num_steps_sampled: 185000
    num_steps_trained: 185000
  iterations_since_restore: 185
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,185,3914.24,185000,2.95,6,-9,649.07




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 186000
  custom_metrics: {}
  date: 2021-09-18_17-13-03
  done: false
  episode_len_mean: 633.4
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.05
  episode_reward_min: -9.0
  episodes_this_iter: 2
  episodes_total: 230
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.0790785498089261
          entropy_coeff: 0.009999999999999998
          kl: 0.009672157183915307
          policy_loss: -0.08380910985999637
          total_loss: 0.1503519655101829
          vf_explained_var: 0.6803658604621887
          vf_loss: 0.24082041788432335
    num_agent_steps_sampled: 186000
    num_agent_steps_trained: 186000
    num_steps_sampled: 186000
    num_steps_trained: 186000
  iterations_since_restore: 186
  node_ip: 192.168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,186,3966.78,186000,3.05,6,-9,633.4


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 187000
  custom_metrics: {}
  date: 2021-09-18_17-13-13
  done: false
  episode_len_mean: 633.4
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.06
  episode_reward_min: -9.0
  episodes_this_iter: 1
  episodes_total: 231
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.649879519144694
          entropy_coeff: 0.009999999999999998
          kl: 0.00963422105832449
          policy_loss: 0.030712714087631966
          total_loss: 0.14195777278186547
          vf_explained_var: 0.6937512159347534
          vf_loss: 0.12362860788901647
    num_agent_steps_sampled: 187000
    num_agent_steps_trained: 187000
    num_steps_sampled: 187000
    num_steps_trained: 187000
  iterations_since_restore: 187
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,187,3977,187000,3.06,6,-9,633.4




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 188000
  custom_metrics: {}
  date: 2021-09-18_17-14-01
  done: false
  episode_len_mean: 633.71
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.04
  episode_reward_min: -9.0
  episodes_this_iter: 2
  episodes_total: 233
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.6269322090678744
          entropy_coeff: 0.009999999999999998
          kl: 0.012198981513686835
          policy_loss: -0.12727104210191303
          total_loss: -0.0417993475165632
          vf_explained_var: 0.7045525312423706
          vf_loss: 0.09653024209870233
    num_agent_steps_sampled: 188000
    num_agent_steps_trained: 188000
    num_steps_sampled: 188000
    num_steps_trained: 188000
  iterations_since_restore: 188
  node_ip: 192.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,188,4025.52,188000,3.04,6,-9,633.71


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 189000
  custom_metrics: {}
  date: 2021-09-18_17-14-13
  done: false
  episode_len_mean: 633.71
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.04
  episode_reward_min: -9.0
  episodes_this_iter: 1
  episodes_total: 234
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.4969156410959032
          entropy_coeff: 0.009999999999999998
          kl: 0.01579447844483795
          policy_loss: -0.018381385091278287
          total_loss: -0.00461319817437066
          vf_explained_var: 0.10751919448375702
          vf_loss: 0.021990757665803863
    num_agent_steps_sampled: 189000
    num_agent_steps_trained: 189000
    num_steps_sampled: 189000
    num_steps_trained: 189000
  iterations_since_restore: 189
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,189,4036.93,189000,3.04,6,-9,633.71


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 190000
  custom_metrics: {}
  date: 2021-09-18_17-14-22
  done: false
  episode_len_mean: 640.52
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.97
  episode_reward_min: -9.0
  episodes_this_iter: 1
  episodes_total: 235
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4271484375
          cur_lr: 5.000000000000001e-05
          entropy: 1.7821719633208382
          entropy_coeff: 0.009999999999999998
          kl: 0.02060451635627795
          policy_loss: -0.03940156002839406
          total_loss: -0.023951968053976695
          vf_explained_var: 0.6233278512954712
          vf_loss: 0.024470126809966235
    num_agent_steps_sampled: 190000
    num_agent_steps_trained: 190000
    num_steps_sampled: 190000
    num_steps_trained: 190000
  iterations_since_restore: 190
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,190,4046.5,190000,2.97,6,-9,640.52




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 191000
  custom_metrics: {}
  date: 2021-09-18_17-15-07
  done: false
  episode_len_mean: 640.43
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.89
  episode_reward_min: -9.0
  episodes_this_iter: 2
  episodes_total: 237
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6407226562500001
          cur_lr: 5.000000000000001e-05
          entropy: 1.6578647202915615
          entropy_coeff: 0.009999999999999998
          kl: 0.0037328081627151337
          policy_loss: 0.09494381739447515
          total_loss: 0.12942702358381616
          vf_explained_var: 0.22548183798789978
          vf_loss: 0.04867015728862801
    num_agent_steps_sampled: 191000
    num_agent_steps_trained: 191000
    num_steps_sampled: 191000
    num_steps_trained: 191000
  iterations_since_restore: 191
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,191,4091.44,191000,2.89,6,-9,640.43


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 192000
  custom_metrics: {}
  date: 2021-09-18_17-15-19
  done: false
  episode_len_mean: 640.43
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.86
  episode_reward_min: -9.0
  episodes_this_iter: 1
  episodes_total: 238
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32036132812500007
          cur_lr: 5.000000000000001e-05
          entropy: 1.7405540148417156
          entropy_coeff: 0.009999999999999998
          kl: 0.004684777227810086
          policy_loss: -0.19435215294361113
          total_loss: -0.20735148357020483
          vf_explained_var: -0.07796776294708252
          vf_loss: 0.002905387455312949
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_steps_sampled: 192000
    num_steps_trained: 192000
  iterations_since_restore: 192
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,192,4102.68,192000,2.86,6,-9,640.43


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 193000
  custom_metrics: {}
  date: 2021-09-18_17-15-28
  done: false
  episode_len_mean: 642.74
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.83
  episode_reward_min: -9.0
  episodes_this_iter: 1
  episodes_total: 239
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16018066406250003
          cur_lr: 5.000000000000001e-05
          entropy: 1.7893300109439425
          entropy_coeff: 0.009999999999999998
          kl: 0.005882607722316714
          policy_loss: -0.06432106474207508
          total_loss: -0.0792608506563637
          vf_explained_var: -0.02352118492126465
          vf_loss: 0.002011234706798051
    num_agent_steps_sampled: 193000
    num_agent_steps_trained: 193000
    num_steps_sampled: 193000
    num_steps_trained: 193000
  iterations_since_restore: 193
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,193,4112.27,193000,2.83,6,-9,642.74




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 194000
  custom_metrics: {}
  date: 2021-09-18_17-15-56
  done: false
  episode_len_mean: 634.85
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.9
  episode_reward_min: -9.0
  episodes_this_iter: 1
  episodes_total: 240
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16018066406250003
          cur_lr: 5.000000000000001e-05
          entropy: 1.9282676299413046
          entropy_coeff: 0.009999999999999998
          kl: 0.010421831647104243
          policy_loss: -0.051400652424328855
          total_loss: 0.020517359218663638
          vf_explained_var: 0.38880980014801025
          vf_loss: 0.08953131292429235
    num_agent_steps_sampled: 194000
    num_agent_steps_trained: 194000
    num_steps_sampled: 194000
    num_steps_trained: 194000
  iterations_since_restore: 194
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,194,4140.13,194000,2.9,6,-9,634.85


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 195000
  custom_metrics: {}
  date: 2021-09-18_17-16-05
  done: false
  episode_len_mean: 639.22
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.86
  episode_reward_min: -9.0
  episodes_this_iter: 1
  episodes_total: 241
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16018066406250003
          cur_lr: 5.000000000000001e-05
          entropy: 1.9660815411143833
          entropy_coeff: 0.009999999999999998
          kl: 0.010881198282703864
          policy_loss: -0.02189102123181025
          total_loss: -0.027002530131075116
          vf_explained_var: 0.40650150179862976
          vf_loss: 0.01280634422113912
    num_agent_steps_sampled: 195000
    num_agent_steps_trained: 195000
    num_steps_sampled: 195000
    num_steps_trained: 195000
  iterations_since_restore: 195
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,195,4149.43,195000,2.86,6,-9,639.22




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 196000
  custom_metrics: {}
  date: 2021-09-18_17-16-32
  done: false
  episode_len_mean: 640.77
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.86
  episode_reward_min: -9.0
  episodes_this_iter: 2
  episodes_total: 243
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16018066406250003
          cur_lr: 5.000000000000001e-05
          entropy: 1.619124382072025
          entropy_coeff: 0.009999999999999998
          kl: 0.01565568062296268
          policy_loss: -0.04904334396123886
          total_loss: 0.19642961911029286
          vf_explained_var: 0.23048150539398193
          vf_loss: 0.2591564692556858
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 196000
    num_steps_sampled: 196000
    num_steps_trained: 196000
  iterations_since_restore: 196
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,196,4176.02,196000,2.86,6,-9,640.77


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 197000
  custom_metrics: {}
  date: 2021-09-18_17-16-43
  done: false
  episode_len_mean: 645.9
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.88
  episode_reward_min: -9.0
  episodes_this_iter: 1
  episodes_total: 244
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16018066406250003
          cur_lr: 5.000000000000001e-05
          entropy: 1.61179246240192
          entropy_coeff: 0.009999999999999998
          kl: 0.014119965968140047
          policy_loss: 0.006219960418012407
          total_loss: 0.00839064617951711
          vf_explained_var: 0.8183921575546265
          vf_loss: 0.016026866829229727
    num_agent_steps_sampled: 197000
    num_agent_steps_trained: 197000
    num_steps_sampled: 197000
    num_steps_trained: 197000
  iterations_since_restore: 197
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,197,4186.57,197000,2.88,6,-9,645.9


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 198000
  custom_metrics: {}
  date: 2021-09-18_17-16-52
  done: false
  episode_len_mean: 645.9
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.9
  episode_reward_min: -9.0
  episodes_this_iter: 1
  episodes_total: 245
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16018066406250003
          cur_lr: 5.000000000000001e-05
          entropy: 1.7320823404524062
          entropy_coeff: 0.009999999999999998
          kl: 0.011521203077803103
          policy_loss: -0.02966531291604042
          total_loss: -0.01225369738207923
          vf_explained_var: 0.6073890924453735
          vf_loss: 0.032886964399626274
    num_agent_steps_sampled: 198000
    num_agent_steps_trained: 198000
    num_steps_sampled: 198000
    num_steps_trained: 198000
  iterations_since_restore: 198
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,198,4195.74,198000,2.9,6,-9,645.9


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 199000
  custom_metrics: {}
  date: 2021-09-18_17-17-01
  done: false
  episode_len_mean: 645.9
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 2.95
  episode_reward_min: -9.0
  episodes_this_iter: 1
  episodes_total: 246
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16018066406250003
          cur_lr: 5.000000000000001e-05
          entropy: 1.7770652068985833
          entropy_coeff: 0.009999999999999998
          kl: 0.010651902790463394
          policy_loss: -0.052710253579749004
          total_loss: -0.025014198819796243
          vf_explained_var: 0.6572611331939697
          vf_loss: 0.04376048035143564
    num_agent_steps_sampled: 199000
    num_agent_steps_trained: 199000
    num_steps_sampled: 199000
    num_steps_trained: 199000
  iterations_since_restore: 199
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,199,4205.05,199000,2.95,6,-9,645.9




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-09-18_17-18-03
  done: false
  episode_len_mean: 630.56
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.18
  episode_reward_min: -9.0
  episodes_this_iter: 3
  episodes_total: 249
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16018066406250003
          cur_lr: 5.000000000000001e-05
          entropy: 1.5940816150771246
          entropy_coeff: 0.009999999999999998
          kl: 0.022055570853249544
          policy_loss: 0.14354713981350262
          total_loss: 0.21246946433352099
          vf_explained_var: 0.30215269327163696
          vf_loss: 0.08133026512676023
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 200000
  iterations_since_restore: 200
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,200,4266.46,200000,3.18,6,-9,630.56




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 201000
  custom_metrics: {}
  date: 2021-09-18_17-18-33
  done: false
  episode_len_mean: 622.58
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.25
  episode_reward_min: -9.0
  episodes_this_iter: 1
  episodes_total: 250
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2402709960937499
          cur_lr: 5.000000000000001e-05
          entropy: 1.6753383941120572
          entropy_coeff: 0.009999999999999998
          kl: 0.016799043069577652
          policy_loss: -0.026868377543158
          total_loss: 0.18969121442900763
          vf_explained_var: 0.5132055282592773
          vf_loss: 0.22927665549019974
    num_agent_steps_sampled: 201000
    num_agent_steps_trained: 201000
    num_steps_sampled: 201000
    num_steps_trained: 201000
  iterations_since_restore: 201
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,201,4296.5,201000,3.25,6,-9,622.58




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 202000
  custom_metrics: {}
  date: 2021-09-18_17-19-14
  done: false
  episode_len_mean: 614.59
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.38
  episode_reward_min: -6.0
  episodes_this_iter: 2
  episodes_total: 252
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2402709960937499
          cur_lr: 5.000000000000001e-05
          entropy: 1.5903939829932319
          entropy_coeff: 0.009999999999999998
          kl: 0.024218770579579836
          policy_loss: 0.03402204646004571
          total_loss: 0.195796297573381
          vf_explained_var: 0.6170183420181274
          vf_loss: 0.1718591243856483
    num_agent_steps_sampled: 202000
    num_agent_steps_trained: 202000
    num_steps_sampled: 202000
    num_steps_trained: 202000
  iterations_since_restore: 202
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,202,4338.02,202000,3.38,6,-6,614.59


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 203000
  custom_metrics: {}
  date: 2021-09-18_17-19-26
  done: false
  episode_len_mean: 619.62
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.33
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 253
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 1.9618438243865968
          entropy_coeff: 0.009999999999999998
          kl: 0.009801452746670868
          policy_loss: -0.000606101109749741
          total_loss: 0.07367879344771305
          vf_explained_var: 0.4964483976364136
          vf_loss: 0.09037082351278514
    num_agent_steps_sampled: 203000
    num_agent_steps_trained: 203000
    num_steps_sampled: 203000
    num_steps_trained: 203000
  iterations_since_restore: 203
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,203,4349.46,203000,3.33,6,-6,619.62




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 204000
  custom_metrics: {}
  date: 2021-09-18_17-20-14
  done: false
  episode_len_mean: 614.85
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.35
  episode_reward_min: -6.0
  episodes_this_iter: 2
  episodes_total: 255
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 1.7570990019374424
          entropy_coeff: 0.009999999999999998
          kl: 0.010735574484949693
          policy_loss: 0.03095068521797657
          total_loss: 0.17167925246887736
          vf_explained_var: 0.7827915549278259
          vf_loss: 0.15443038727518998
    num_agent_steps_sampled: 204000
    num_agent_steps_trained: 204000
    num_steps_sampled: 204000
    num_steps_trained: 204000
  iterations_since_restore: 204
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,204,4397.52,204000,3.35,6,-6,614.85




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 205000
  custom_metrics: {}
  date: 2021-09-18_17-21-57
  done: false
  episode_len_mean: 585.41
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.51
  episode_reward_min: -6.0
  episodes_this_iter: 6
  episodes_total: 261
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 1.785584114657508
          entropy_coeff: 0.009999999999999998
          kl: 0.00799766156627266
          policy_loss: -0.020339138474729325
          total_loss: 0.2270138586974806
          vf_explained_var: 0.7872968912124634
          vf_loss: 0.2623264300585207
    num_agent_steps_sampled: 205000
    num_agent_steps_trained: 205000
    num_steps_sampled: 205000
    num_steps_trained: 205000
  iterations_since_restore: 205
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,205,4500.82,205000,3.51,6,-6,585.41


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 206000
  custom_metrics: {}
  date: 2021-09-18_17-22-07
  done: false
  episode_len_mean: 585.41
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.51
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 262
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3604064941406251
          cur_lr: 5.000000000000001e-05
          entropy: 2.129924609926012
          entropy_coeff: 0.009999999999999998
          kl: 0.02179474124076049
          policy_loss: 0.04383539110422134
          total_loss: 0.1337899688217375
          vf_explained_var: -0.24238643050193787
          vf_loss: 0.10339885934841328
    num_agent_steps_sampled: 206000
    num_agent_steps_trained: 206000
    num_steps_sampled: 206000
    num_steps_trained: 206000
  iterations_since_restore: 206
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,206,4510.85,206000,3.51,6,-6,585.41


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 207000
  custom_metrics: {}
  date: 2021-09-18_17-22-16
  done: false
  episode_len_mean: 585.41
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.5
  episode_reward_min: -6.0
  episodes_this_iter: 1
  episodes_total: 263
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 2.207993619971805
          entropy_coeff: 0.009999999999999998
          kl: 0.00819208688011318
          policy_loss: 0.18092907042139106
          total_loss: 0.20946293990645143
          vf_explained_var: 0.5497071743011475
          vf_loss: 0.04618508404948645
    num_agent_steps_sampled: 207000
    num_agent_steps_trained: 207000
    num_steps_sampled: 207000
    num_steps_trained: 207000
  iterations_since_restore: 207
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,207,4520.08,207000,3.5,6,-6,585.41




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 208000
  custom_metrics: {}
  date: 2021-09-18_17-22-44
  done: false
  episode_len_mean: 575.92
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.64
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 265
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 2.077219041188558
          entropy_coeff: 0.009999999999999998
          kl: 0.006925587311254983
          policy_loss: 0.09331646172536744
          total_loss: 0.11170255665977796
          vf_explained_var: 0.5169402956962585
          vf_loss: 0.03541424489683575
    num_agent_steps_sampled: 208000
    num_agent_steps_trained: 208000
    num_steps_sampled: 208000
    num_steps_trained: 208000
  iterations_since_restore: 208
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,208,4547.51,208000,3.64,6,-3,575.92


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 209000
  custom_metrics: {}
  date: 2021-09-18_17-22-53
  done: false
  episode_len_mean: 575.92
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.61
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 266
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.9864775737126668
          entropy_coeff: 0.009999999999999998
          kl: 0.008373558479764023
          policy_loss: 0.022558186741338835
          total_loss: 0.08094161413609982
          vf_explained_var: 0.29159313440322876
          vf_loss: 0.07372137146174079
    num_agent_steps_sampled: 209000
    num_agent_steps_trained: 209000
    num_steps_sampled: 209000
    num_steps_trained: 209000
  iterations_since_restore: 209
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,209,4557.14,209000,3.61,6,-3,575.92




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 210000
  custom_metrics: {}
  date: 2021-09-18_17-24-08
  done: false
  episode_len_mean: 564.63
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.65
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 269
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 2.2119108504719205
          entropy_coeff: 0.009999999999999998
          kl: 0.012475185451601808
          policy_loss: -0.12273476057582432
          total_loss: 0.14312263909313414
          vf_explained_var: 0.28918394446372986
          vf_loss: 0.281232296447787
    num_agent_steps_sampled: 210000
    num_agent_steps_trained: 210000
    num_steps_sampled: 210000
    num_steps_trained: 210000
  iterations_since_restore: 210
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,210,4631.49,210000,3.65,6,-3,564.63




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 211000
  custom_metrics: {}
  date: 2021-09-18_17-25-02
  done: false
  episode_len_mean: 562.26
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.66
  episode_reward_min: -3.0
  episodes_this_iter: 3
  episodes_total: 272
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 2.2783604939778646
          entropy_coeff: 0.009999999999999998
          kl: 0.013672894119294288
          policy_loss: -0.07458805955118603
          total_loss: 0.26015892773866656
          vf_explained_var: 0.5558165907859802
          vf_loss: 0.3501388981938362
    num_agent_steps_sampled: 211000
    num_agent_steps_trained: 211000
    num_steps_sampled: 211000
    num_steps_trained: 211000
  iterations_since_restore: 211
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,211,4686.04,211000,3.66,6,-3,562.26


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 212000
  custom_metrics: {}
  date: 2021-09-18_17-25-14
  done: false
  episode_len_mean: 564.35
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.67
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 273
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 2.130074764622582
          entropy_coeff: 0.009999999999999998
          kl: 0.010504879534900249
          policy_loss: -0.012322091932098071
          total_loss: 0.05519568705931306
          vf_explained_var: 0.6274265050888062
          vf_loss: 0.08313948707655072
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000
    num_steps_sampled: 212000
    num_steps_trained: 212000
  iterations_since_restore: 212
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,212,4697.09,212000,3.67,6,-3,564.35




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 213000
  custom_metrics: {}
  date: 2021-09-18_17-26-02
  done: false
  episode_len_mean: 561.88
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.66
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 275
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 2.049455401632521
          entropy_coeff: 0.009999999999999998
          kl: 0.017188288833770424
          policy_loss: -0.007448598038819101
          total_loss: 0.37887141381700834
          vf_explained_var: 0.12696726620197296
          vf_loss: 0.3975224108952615
    num_agent_steps_sampled: 213000
    num_agent_steps_trained: 213000
    num_steps_sampled: 213000
    num_steps_trained: 213000
  iterations_since_restore: 213
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,213,4745.33,213000,3.66,6,-3,561.88




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 214000
  custom_metrics: {}
  date: 2021-09-18_17-28-40
  done: false
  episode_len_mean: 527.74
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.66
  episode_reward_min: -3.0
  episodes_this_iter: 9
  episodes_total: 284
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 2.004732468393114
          entropy_coeff: 0.009999999999999998
          kl: 0.009575118247439472
          policy_loss: -0.10403082304530674
          total_loss: 0.16442310098144744
          vf_explained_var: 0.20316599309444427
          vf_loss: 0.28332484771187105
    num_agent_steps_sampled: 214000
    num_agent_steps_trained: 214000
    num_steps_sampled: 214000
    num_steps_trained: 214000
  iterations_since_restore: 214
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,214,4903.35,214000,3.66,6,-3,527.74




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 215000
  custom_metrics: {}
  date: 2021-09-18_17-30-06
  done: false
  episode_len_mean: 516.83
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.67
  episode_reward_min: -3.0
  episodes_this_iter: 4
  episodes_total: 288
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.5891621165805392
          entropy_coeff: 0.009999999999999998
          kl: 0.009573723536428493
          policy_loss: -0.0534142404794693
          total_loss: 0.6710453248686261
          vf_explained_var: 0.42722854018211365
          vf_loss: 0.7351755420366923
    num_agent_steps_sampled: 215000
    num_agent_steps_trained: 215000
    num_steps_sampled: 215000
    num_steps_trained: 215000
  iterations_since_restore: 215
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,215,4989.92,215000,3.67,6,-3,516.83




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 216000
  custom_metrics: {}
  date: 2021-09-18_17-32-38
  done: false
  episode_len_mean: 485.19
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.73
  episode_reward_min: -3.0
  episodes_this_iter: 8
  episodes_total: 296
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.7568458689583673
          entropy_coeff: 0.009999999999999998
          kl: 0.009470004092806643
          policy_loss: 0.11183282261093458
          total_loss: 0.4269035812881258
          vf_explained_var: 0.22673867642879486
          vf_loss: 0.32751963867081535
    num_agent_steps_sampled: 216000
    num_agent_steps_trained: 216000
    num_steps_sampled: 216000
    num_steps_trained: 216000
  iterations_since_restore: 216
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,216,5141.29,216000,3.73,6,-3,485.19




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 217000
  custom_metrics: {}
  date: 2021-09-18_17-34-04
  done: false
  episode_len_mean: 476.57
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.73
  episode_reward_min: -3.0
  episodes_this_iter: 4
  episodes_total: 300
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.599537558025784
          entropy_coeff: 0.009999999999999998
          kl: 0.008066461301945773
          policy_loss: -0.031399761140346524
          total_loss: 0.3779536017941104
          vf_explained_var: 0.5836997628211975
          vf_loss: 0.42098793149408364
    num_agent_steps_sampled: 217000
    num_agent_steps_trained: 217000
    num_steps_sampled: 217000
    num_steps_trained: 217000
  iterations_since_restore: 217
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,217,5227.31,217000,3.73,6,-3,476.57




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 218000
  custom_metrics: {}
  date: 2021-09-18_17-36-25
  done: false
  episode_len_mean: 447.35
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.74
  episode_reward_min: -3.0
  episodes_this_iter: 8
  episodes_total: 308
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.889686772558424
          entropy_coeff: 0.009999999999999998
          kl: 0.011468235414933206
          policy_loss: 0.018775958567857742
          total_loss: 0.9072711825370788
          vf_explained_var: 0.542256772518158
          vf_loss: 0.9011922455496258
    num_agent_steps_sampled: 218000
    num_agent_steps_trained: 218000
    num_steps_sampled: 218000
    num_steps_trained: 218000
  iterations_since_restore: 218
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,218,5368.85,218000,3.74,6,-3,447.35


Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 219000
  custom_metrics: {}
  date: 2021-09-18_17-36-37
  done: false
  episode_len_mean: 450.3
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.7
  episode_reward_min: -3.0
  episodes_this_iter: 1
  episodes_total: 309
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 1.9913723429044088
          entropy_coeff: 0.009999999999999998
          kl: 0.01132313459614543
          policy_loss: 0.030882071952025095
          total_loss: 0.15351998938454522
          vf_explained_var: 0.16388773918151855
          vf_loss: 0.13643024609320695
    num_agent_steps_sampled: 219000
    num_agent_steps_trained: 219000
    num_steps_sampled: 219000
    num_steps_trained: 219000
  iterations_since_restore: 219
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,219,5380.47,219000,3.7,6,-3,450.3




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-09-18_17-38-30
  done: false
  episode_len_mean: 420.34
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.78
  episode_reward_min: -3.0
  episodes_this_iter: 5
  episodes_total: 314
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5406097412109376
          cur_lr: 5.000000000000001e-05
          entropy: 2.028445283571879
          entropy_coeff: 0.009999999999999998
          kl: 0.02290706198909834
          policy_loss: -0.035162400123145844
          total_loss: 0.7311061292886734
          vf_explained_var: 0.3798457384109497
          vf_loss: 0.7741691980097029
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 220000
  iterations_since_restore: 220
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,220,5493.68,220000,3.78,6,-3,420.34




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 221000
  custom_metrics: {}
  date: 2021-09-18_17-39-38
  done: false
  episode_len_mean: 410.18
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.77
  episode_reward_min: -3.0
  episodes_this_iter: 4
  episodes_total: 318
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.9485778609911601
          entropy_coeff: 0.009999999999999998
          kl: 0.0101652905012615
          policy_loss: 0.12355755037731594
          total_loss: 0.4367978495028284
          vf_explained_var: 0.32721036672592163
          vf_loss: 0.32448289969729055
    num_agent_steps_sampled: 221000
    num_agent_steps_trained: 221000
    num_steps_sampled: 221000
    num_steps_trained: 221000
  iterations_since_restore: 221
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,221,5561.59,221000,3.77,6,-3,410.18




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 222000
  custom_metrics: {}
  date: 2021-09-18_17-43-21
  done: false
  episode_len_mean: 368.66
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 3.88
  episode_reward_min: -3.0
  episodes_this_iter: 11
  episodes_total: 329
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.9212625331348843
          entropy_coeff: 0.009999999999999998
          kl: 0.016550959875603885
          policy_loss: 0.04711655076179239
          total_loss: 0.7407066984309091
          vf_explained_var: 0.46857982873916626
          vf_loss: 0.6993813518020842
    num_agent_steps_sampled: 222000
    num_agent_steps_trained: 222000
    num_steps_sampled: 222000
    num_steps_trained: 222000
  iterations_since_restore: 222
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,222,5784.35,222000,3.88,6,-3,368.66




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 223000
  custom_metrics: {}
  date: 2021-09-18_17-46-18
  done: false
  episode_len_mean: 309.8
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 4.18
  episode_reward_min: -3.0
  episodes_this_iter: 9
  episodes_total: 338
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.9074920641051398
          entropy_coeff: 0.009999999999999998
          kl: 0.008588387973760724
          policy_loss: -0.05981851791342099
          total_loss: 0.6974038812849257
          vf_explained_var: 0.5798134803771973
          vf_loss: 0.7693328685230679
    num_agent_steps_sampled: 223000
    num_agent_steps_trained: 223000
    num_steps_sampled: 223000
    num_steps_trained: 223000
  iterations_since_restore: 223
  node_ip: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,223,5961.24,223000,4.18,6,-3,309.8




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 224000
  custom_metrics: {}
  date: 2021-09-18_17-46-47
  done: false
  episode_len_mean: 309.05
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 4.21
  episode_reward_min: -3.0
  episodes_this_iter: 2
  episodes_total: 340
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 2.355715587404039
          entropy_coeff: 0.009999999999999998
          kl: 0.005836548301087064
          policy_loss: -0.01735704607433743
          total_loss: 0.025700867941810025
          vf_explained_var: 0.3180958926677704
          vf_loss: 0.0618821293529537
    num_agent_steps_sampled: 224000
    num_agent_steps_trained: 224000
    num_steps_sampled: 224000
    num_steps_trained: 224000
  iterations_since_restore: 224
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,224,5990.67,224000,4.21,6,-3,309.05




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 225000
  custom_metrics: {}
  date: 2021-09-18_17-51-01
  done: false
  episode_len_mean: 225.26
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 4.61
  episode_reward_min: -3.0
  episodes_this_iter: 13
  episodes_total: 353
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.8499625113275315
          entropy_coeff: 0.009999999999999998
          kl: 0.009661372075146963
          policy_loss: -0.09751475320922004
          total_loss: 0.6662868464986483
          vf_explained_var: 0.5030145645141602
          vf_loss: 0.7744666821426816
    num_agent_steps_sampled: 225000
    num_agent_steps_trained: 225000
    num_steps_sampled: 225000
    num_steps_trained: 225000
  iterations_since_restore: 225
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,225,6243.88,225000,4.61,6,-3,225.26




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 226000
  custom_metrics: {}
  date: 2021-09-18_17-54-22
  done: false
  episode_len_mean: 189.41
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 4.78
  episode_reward_min: -1.0
  episodes_this_iter: 10
  episodes_total: 363
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.8700449758105808
          entropy_coeff: 0.009999999999999998
          kl: 0.009782067041735914
          policy_loss: -0.03558750715520647
          total_loss: 0.649059945013788
          vf_explained_var: 0.6287696957588196
          vf_loss: 0.6954154802693261
    num_agent_steps_sampled: 226000
    num_agent_steps_trained: 226000
    num_steps_sampled: 226000
    num_steps_trained: 226000
  iterations_since_restore: 226
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,226,6445.12,226000,4.78,6,-1,189.41




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 227000
  custom_metrics: {}
  date: 2021-09-18_17-58-39
  done: false
  episode_len_mean: 135.83
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 4.96
  episode_reward_min: 0.0
  episodes_this_iter: 13
  episodes_total: 376
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.8517949355973138
          entropy_coeff: 0.009999999999999998
          kl: 0.007263908439354339
          policy_loss: -0.10228131744596693
          total_loss: 0.7450110935502582
          vf_explained_var: 0.5960050821304321
          vf_loss: 0.8599199480480618
    num_agent_steps_sampled: 227000
    num_agent_steps_trained: 227000
    num_steps_sampled: 227000
    num_steps_trained: 227000
  iterations_since_restore: 227
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,227,6702.44,227000,4.96,6,0,135.83




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 228000
  custom_metrics: {}
  date: 2021-09-18_18-01-40
  done: false
  episode_len_mean: 138.1
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 4.85
  episode_reward_min: 0.0
  episodes_this_iter: 9
  episodes_total: 385
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 1.8913516892327202
          entropy_coeff: 0.009999999999999998
          kl: 0.009724669212258293
          policy_loss: 0.005155025463965204
          total_loss: 0.6634866942962011
          vf_explained_var: 0.5824788212776184
          vf_loss: 0.6693593174219131
    num_agent_steps_sampled: 228000
    num_agent_steps_trained: 228000
    num_steps_sampled: 228000
    num_steps_trained: 228000
  iterations_since_restore: 228
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,228,6883.63,228000,4.85,6,0,138.1




Result for PPO_my_env_6a34a_00000:
  agent_timesteps_total: 229000
  custom_metrics: {}
  date: 2021-09-18_18-02-26
  done: false
  episode_len_mean: 145.27
  episode_media: {}
  episode_reward_max: 6.0
  episode_reward_mean: 4.79
  episode_reward_min: 0.0
  episodes_this_iter: 3
  episodes_total: 388
  experiment_id: 5962629f4db34f84b72a94ad1dbae226
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8109146118164059
          cur_lr: 5.000000000000001e-05
          entropy: 2.078606551223331
          entropy_coeff: 0.009999999999999998
          kl: 0.01039004403833337
          policy_loss: 0.06413943337069618
          total_loss: 0.19062571502808068
          vf_explained_var: 0.3345493674278259
          vf_loss: 0.13884690875808398
    num_agent_steps_sampled: 229000
    num_agent_steps_trained: 229000
    num_steps_sampled: 229000
    num_steps_trained: 229000
  iterations_since_restore: 229
  node_ip: 192

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_6a34a_00000,RUNNING,192.168.3.5:168620,229,6929.53,229000,4.79,6,0,145.27


