In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=2, stride=2, padding=0),  
            nn.ELU(),
            nn.Conv2d(32, 32, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(32, 64, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(64, 128, kernel_size=2, stride=2, padding=0),
            nn.ELU(), 
            nn.Conv2d(128, 256, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Conv2d(256, 512, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
from torch.nn.functional import one_hot

class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        visual_features_dim = 512
        target_features_dim = 9 * 11 * 11 
        self.visual_encoder = VisualEncoder()
        self.visual_encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AngelaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.target_encoder = nn.Sequential(
            nn.Conv3d(7, 1, kernel_size=1, stride=1, padding=0),
            nn.ELU(),
        )
        policy_hidden_dim = 256 
        self.policy_network = nn.Sequential(
            nn.Linear(visual_features_dim + target_features_dim, 1024),
            nn.ELU(),
            nn.Linear(1024, 512),
            nn.ELU(),
            nn.Linear(512, policy_hidden_dim),
            nn.ELU(),
            nn.Linear(policy_hidden_dim, policy_hidden_dim),
            nn.ELU(),
            #nn.Linear(policy_hidden_dim, policy_hidden_dim),
            #nn.ELU(),
        )
        self.action_head = nn.Linear(policy_hidden_dim, action_space.n)
        self.value_head = nn.Linear(policy_hidden_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.visual_encoder.cuda()
            self.target_encoder.cuda()
            self.policy_network.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs']
        pov = obs['pov'].permute(0, 3, 1, 2).float() / 255.0
        target = one_hot(obs['target_grid'].long(), num_classes=7).permute(0, 4, 1, 2, 3).float()
        if self.use_cuda:
            pov.cuda()
            target.cuda()
            
        with torch.no_grad():
            visual_features = self.visual_encoder(pov)
            
        target_features = self.target_encoder(target)
        target_features = target_features.reshape(target_features.shape[0], -1)
        features = torch.cat([visual_features, target_features], dim=1)
        features = self.policy_network(features)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
visual_features_dim = 512
target_features_dim = 9 * 11 * 11
policy_hidden_dim = 256 

policy_network = nn.Sequential(
    nn.Linear(visual_features_dim + target_features_dim, 1024),
    nn.ELU(),
    nn.Linear(1024, 512),
    nn.ELU(),
    nn.Linear(512, policy_hidden_dim),
    nn.ELU(),
    nn.Linear(policy_hidden_dim, policy_hidden_dim),
    nn.ELU(),
    #nn.Linear(policy_hidden_dim, policy_hidden_dim),
    #nn.ELU(),
)

sum(p.numel() for p in policy_network.parameters())

2362368

In [5]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [6]:
class VisualObservationWrapper(ObsWrapper):
    def __init__(self, env, include_target=False):
        super().__init__(env)
        self.observation_space = {   
            'pov': gym.spaces.Box(low=0, high=255, shape=(64, 64, 3)),
            'inventory': gym.spaces.Box(low=0.0, high=20.0, shape=(6,)),
            'compass': gym.spaces.Box(low=-180.0, high=180.0, shape=(1,))
        }
        if include_target:
            self.observation_space['target_grid'] = \
                gym.spaces.Box(low=0, high=6, shape=(9, 11, 11))
        self.observation_space = gym.spaces.Dict(self.observation_space)

    def observation(self, obs, reward=None, done=None, info=None):
        if info is not None:
            if 'target_grid' in info:
                target_grid = info['target_grid']
                del info['target_grid']
            else:
                logger.error(f'info: {info}')
                if hasattr(self.unwrapped, 'should_reset'):
                    self.unwrapped.should_reset(True)
                target_grid = self.env.unwrapped.tasks.current.target_grid
        else:
            target_grid = self.env.unwrapped.tasks.current.target_grid
        return {
            'pov': obs['pov'].astype(np.float32),
            'inventory': obs['inventory'],
            'compass': np.array([obs['compass']['angle'].item()]),
            'target_grid': target_grid
        }

In [7]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, rew):
        if rew == 0:
            rew = -0.01
        if abs(rew) == 1:
            rew /= 10
            
        return rew
    
def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=250)
    env.update_taskset(TaskSet(preset=['C3',  'C17', 'C20',
                                       'C22', 'C32', 'C40',
                                       'C85', 'C87', 'C93']))
    #env = PovOnlyWrapper(env)
    env = VisualObservationWrapper(env, include_target=True)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    env = RewardWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [7]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 3,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 5_000,
             "lr": 1e-4,
             #"gamma": 0.99,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO MultiTask <=10 pretrained (AngelaCNN) (3 noops after placement) r: -0.01 div10"
                  }
              }

        },
        loggers=[WandbLogger],
        local_dir="/IGLU-Minecraft/checkpoints/10_blocks_max",
        keep_checkpoints_num=50,
        checkpoint_freq=5,
        checkpoint_at_end=True)

2021-11-06 22:05:49,744	INFO wandb.py:170 -- Already logged into W&B.
2021-11-06 22:05:49,759	ERROR syncer.py:72 -- Log sync requires rsync to be installed.


Trial name,status,loc
PPO_my_env_b3578_00000,RUNNING,


[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.6 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=492355)[0m 2021-11-06 22:05:53,151	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=492355)[0m 2021-11-06 22:05:53,151	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 9996
  custom_metrics: {}
  date: 2021-11-06_22-09-16
  done: false
  episode_len_mean: 100.96938775510205
  episode_media: {}
  episode_reward_max: 3.020000000000007
  episode_reward_mean: -0.8130612244897962
  episode_reward_min: -1.2400000000000009
  episodes_this_iter: 98
  episodes_total: 98
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999998
          cur_lr: 0.00010000000000000002
          entropy: 2.882957436080672
          entropy_coeff: 0.01
          kl: 0.0067987647023400955
          policy_loss: -0.014011172058745327
          total_loss: -0.003066996443602774
          vf_explained_var: -0.24038957059383392
          vf_loss: 0.03841399739100399
    num_agent_steps_sampled: 9996
    num_agent_steps_trained: 9996
    num_steps_sampled: 9996
    num_steps_trained: 9996
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,1,197.587,9996,-0.813061,3.02,-1.24,100.969


Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 19992
  custom_metrics: {}
  date: 2021-11-06_22-11-11
  done: false
  episode_len_mean: 98.89108910891089
  episode_media: {}
  episode_reward_max: 2.730000000000001
  episode_reward_mean: -0.7006930693069311
  episode_reward_min: -1.5600000000000007
  episodes_this_iter: 101
  episodes_total: 199
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999998
          cur_lr: 0.00010000000000000002
          entropy: 2.8704273527504034
          entropy_coeff: 0.01
          kl: 0.008737530106856474
          policy_loss: -0.01511833194611419
          total_loss: 0.023536202814589197
          vf_explained_var: -0.021123560145497322
          vf_loss: 0.06561130183991681
    num_agent_steps_sampled: 19992
    num_agent_steps_trained: 19992
    num_steps_sampled: 19992
    num_steps_trained: 19992


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,2,312.206,19992,-0.700693,2.73,-1.56,98.8911


Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 29988
  custom_metrics: {}
  date: 2021-11-06_22-13-07
  done: false
  episode_len_mean: 97.6116504854369
  episode_media: {}
  episode_reward_max: 2.9000000000000017
  episode_reward_mean: -0.36242718446601996
  episode_reward_min: -1.780000000000001
  episodes_this_iter: 103
  episodes_total: 302
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999998
          cur_lr: 0.00010000000000000002
          entropy: 2.8429944474472957
          entropy_coeff: 0.01
          kl: 0.010043211217889005
          policy_loss: -0.02009713626347291
          total_loss: 0.11554151282001
          vf_explained_var: -0.02040037140250206
          vf_loss: 0.16205995082775623
    num_agent_steps_sampled: 29988
    num_agent_steps_trained: 29988
    num_steps_sampled: 29988
    num_steps_trained: 29988
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,3,427.81,29988,-0.362427,2.9,-1.78,97.6117




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 39984
  custom_metrics: {}
  date: 2021-11-06_22-15-45
  done: false
  episode_len_mean: 96.11764705882354
  episode_media: {}
  episode_reward_max: 5.030000000000001
  episode_reward_mean: 0.6533333333333339
  episode_reward_min: -1.7000000000000008
  episodes_this_iter: 102
  episodes_total: 404
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999998
          cur_lr: 0.00010000000000000002
          entropy: 2.8128473255369397
          entropy_coeff: 0.01
          kl: 0.01222613251808544
          policy_loss: -0.022945866004651427
          total_loss: 0.28771926366811634
          vf_explained_var: 0.29705411195755005
          vf_loss: 0.3363483756812464
    num_agent_steps_sampled: 39984
    num_agent_steps_trained: 39984
    num_steps_sampled: 39984
    num_steps_trained: 39984
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,4,586.727,39984,0.653333,5.03,-1.7,96.1176


Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 49980
  custom_metrics: {}
  date: 2021-11-06_22-17-50
  done: false
  episode_len_mean: 100.2
  episode_media: {}
  episode_reward_max: 4.760000000000006
  episode_reward_mean: 0.8541000000000016
  episode_reward_min: -1.6700000000000008
  episodes_this_iter: 100
  episodes_total: 504
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999998
          cur_lr: 0.00010000000000000002
          entropy: 2.7911398015470588
          entropy_coeff: 0.01
          kl: 0.013859893252548699
          policy_loss: -0.02394891201208035
          total_loss: 0.3152843459174992
          vf_explained_var: 0.3669101297855377
          vf_loss: 0.36437267779539795
    num_agent_steps_sampled: 49980
    num_agent_steps_trained: 49980
    num_steps_sampled: 49980
    num_steps_trained: 49980
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,5,710.844,49980,0.8541,4.76,-1.67,100.2




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 59976
  custom_metrics: {}
  date: 2021-11-06_22-20-07
  done: false
  episode_len_mean: 101.82
  episode_media: {}
  episode_reward_max: 4.930000000000001
  episode_reward_mean: 0.9985000000000016
  episode_reward_min: -1.6500000000000008
  episodes_this_iter: 99
  episodes_total: 603
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999998
          cur_lr: 0.00010000000000000002
          entropy: 2.7747156992936746
          entropy_coeff: 0.01
          kl: 0.017946328197573493
          policy_loss: -0.026925969722433985
          total_loss: 0.3421958462916251
          vf_explained_var: 0.4086821675300598
          vf_loss: 0.3932797075273135
    num_agent_steps_sampled: 59976
    num_agent_steps_trained: 59976
    num_steps_sampled: 59976
    num_steps_trained: 59976
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,6,848.346,59976,0.9985,4.93,-1.65,101.82




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 69972
  custom_metrics: {}
  date: 2021-11-06_22-22-25
  done: false
  episode_len_mean: 102.84
  episode_media: {}
  episode_reward_max: 5.1800000000000015
  episode_reward_mean: 1.402000000000003
  episode_reward_min: -1.7700000000000011
  episodes_this_iter: 97
  episodes_total: 700
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999998
          cur_lr: 0.00010000000000000002
          entropy: 2.749470445029756
          entropy_coeff: 0.01
          kl: 0.01701558884952315
          policy_loss: -0.024770759908148112
          total_loss: 0.3153886606741665
          vf_explained_var: 0.46201270818710327
          vf_loss: 0.3642510064583049
    num_agent_steps_sampled: 69972
    num_agent_steps_trained: 69972
    num_steps_sampled: 69972
    num_steps_trained: 69972
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,7,986.401,69972,1.402,5.18,-1.77,102.84




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 79968
  custom_metrics: {}
  date: 2021-11-06_22-25-04
  done: false
  episode_len_mean: 100.5
  episode_media: {}
  episode_reward_max: 5.040000000000001
  episode_reward_mean: 1.0834000000000026
  episode_reward_min: -1.950000000000001
  episodes_this_iter: 100
  episodes_total: 800
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999998
          cur_lr: 0.00010000000000000002
          entropy: 2.737551778809637
          entropy_coeff: 0.01
          kl: 0.015216405026077285
          policy_loss: -0.030675791710233078
          total_loss: 0.27992696514050674
          vf_explained_var: 0.4941944479942322
          vf_loss: 0.33493499320923775
    num_agent_steps_sampled: 79968
    num_agent_steps_trained: 79968
    num_steps_sampled: 79968
    num_steps_trained: 79968
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,8,1144.89,79968,1.0834,5.04,-1.95,100.5




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 89964
  custom_metrics: {}
  date: 2021-11-06_22-27-45
  done: false
  episode_len_mean: 102.8
  episode_media: {}
  episode_reward_max: 5.69
  episode_reward_mean: 1.588600000000004
  episode_reward_min: -1.9500000000000008
  episodes_this_iter: 97
  episodes_total: 897
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999998
          cur_lr: 0.00010000000000000002
          entropy: 2.7144894275909817
          entropy_coeff: 0.01
          kl: 0.01794989639724444
          policy_loss: -0.028097341814611713
          total_loss: 0.3276387840222854
          vf_explained_var: 0.6041265726089478
          vf_loss: 0.3792910394505558
    num_agent_steps_sampled: 89964
    num_agent_steps_trained: 89964
    num_steps_sampled: 89964
    num_steps_trained: 89964
  iterations_since_restore: 9
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,9,1306.37,89964,1.5886,5.69,-1.95,102.8




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 99960
  custom_metrics: {}
  date: 2021-11-06_22-31-15
  done: false
  episode_len_mean: 100.2
  episode_media: {}
  episode_reward_max: 5.720000000000001
  episode_reward_mean: 1.8725000000000054
  episode_reward_min: -2.1999999999999997
  episodes_this_iter: 99
  episodes_total: 996
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999998
          cur_lr: 0.00010000000000000002
          entropy: 2.679502201284099
          entropy_coeff: 0.01
          kl: 0.01843892547095932
          policy_loss: -0.030897222126587333
          total_loss: 0.33880648418910736
          vf_explained_var: 0.6360749006271362
          vf_loss: 0.3928109419778881
    num_agent_steps_sampled: 99960
    num_agent_steps_trained: 99960
    num_steps_sampled: 99960
    num_steps_trained: 99960
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,10,1516.57,99960,1.8725,5.72,-2.2,100.2




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 109956
  custom_metrics: {}
  date: 2021-11-06_22-34-35
  done: false
  episode_len_mean: 97.49514563106796
  episode_media: {}
  episode_reward_max: 9.430000000000007
  episode_reward_mean: 2.019029126213598
  episode_reward_min: -1.8200000000000007
  episodes_this_iter: 103
  episodes_total: 1099
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999998
          cur_lr: 0.00010000000000000002
          entropy: 2.6543958763790947
          entropy_coeff: 0.01
          kl: 0.019016498642828387
          policy_loss: -0.029504858704013193
          total_loss: 0.3336116527533557
          vf_explained_var: 0.6999174356460571
          vf_loss: 0.38585717069287584
    num_agent_steps_sampled: 109956
    num_agent_steps_trained: 109956
    num_steps_sampled: 109956
    num_steps_trained: 109956


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,11,1716.09,109956,2.01903,9.43,-1.82,97.4951




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 119952
  custom_metrics: {}
  date: 2021-11-06_22-38-36
  done: false
  episode_len_mean: 96.75728155339806
  episode_media: {}
  episode_reward_max: 6.510000000000012
  episode_reward_mean: 2.352621359223307
  episode_reward_min: -1.600000000000001
  episodes_this_iter: 103
  episodes_total: 1202
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999998
          cur_lr: 0.00010000000000000002
          entropy: 2.626043531629774
          entropy_coeff: 0.01
          kl: 0.020383396375117574
          policy_loss: -0.035758528064013036
          total_loss: 0.29308553997220266
          vf_explained_var: 0.7461588382720947
          vf_loss: 0.3510278247869932
    num_agent_steps_sampled: 119952
    num_agent_steps_trained: 119952
    num_steps_sampled: 119952
    num_steps_trained: 119952
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,12,1957.47,119952,2.35262,6.51,-1.6,96.7573




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 129948
  custom_metrics: {}
  date: 2021-11-06_22-43-02
  done: false
  episode_len_mean: 90.9090909090909
  episode_media: {}
  episode_reward_max: 8.95000000000001
  episode_reward_mean: 2.8700000000000068
  episode_reward_min: -1.5800000000000005
  episodes_this_iter: 110
  episodes_total: 1312
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 0.00010000000000000002
          entropy: 2.57596069401146
          entropy_coeff: 0.01
          kl: 0.02080014275892654
          policy_loss: -0.03537120167325195
          total_loss: 0.31262989497114707
          vf_explained_var: 0.7252334952354431
          vf_loss: 0.36752065790450983
    num_agent_steps_sampled: 129948
    num_agent_steps_trained: 129948
    num_steps_sampled: 129948
    num_steps_trained: 129948
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,13,2223.08,129948,2.87,8.95,-1.58,90.9091




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 139944
  custom_metrics: {}
  date: 2021-11-06_22-48-24
  done: false
  episode_len_mean: 92.05504587155963
  episode_media: {}
  episode_reward_max: 7.130000000000016
  episode_reward_mean: 3.1777064220183564
  episode_reward_min: 0.159999999999999
  episodes_this_iter: 109
  episodes_total: 1421
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 0.00010000000000000002
          entropy: 2.551375644431155
          entropy_coeff: 0.01
          kl: 0.019077878332017177
          policy_loss: -0.03712439205593024
          total_loss: 0.3298228888780388
          vf_explained_var: 0.7895557284355164
          vf_loss: 0.3838759921172745
    num_agent_steps_sampled: 139944
    num_agent_steps_trained: 139944
    num_steps_sampled: 139944
    num_steps_trained: 139944
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,14,2545.25,139944,3.17771,7.13,0.16,92.055




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 149940
  custom_metrics: {}
  date: 2021-11-06_22-54-50
  done: false
  episode_len_mean: 84.00840336134453
  episode_media: {}
  episode_reward_max: 9.82
  episode_reward_mean: 3.023949579831939
  episode_reward_min: -1.4700000000000009
  episodes_this_iter: 119
  episodes_total: 1540
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 0.00010000000000000002
          entropy: 2.5483338394735613
          entropy_coeff: 0.01
          kl: 0.017781711699984052
          policy_loss: -0.035487792405307805
          total_loss: 0.3097625810517651
          vf_explained_var: 0.8187939524650574
          vf_loss: 0.3627319406845376
    num_agent_steps_sampled: 149940
    num_agent_steps_trained: 149940
    num_steps_sampled: 149940
    num_steps_trained: 149940
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,15,2931.02,149940,3.02395,9.82,-1.47,84.0084




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 159936
  custom_metrics: {}
  date: 2021-11-06_22-58-34
  done: false
  episode_len_mean: 89.45045045045045
  episode_media: {}
  episode_reward_max: 7.160000000000011
  episode_reward_mean: 3.203783783783791
  episode_reward_min: 0.0899999999999996
  episodes_this_iter: 111
  episodes_total: 1651
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 0.00010000000000000002
          entropy: 2.5235114162803716
          entropy_coeff: 0.01
          kl: 0.01848577281741722
          policy_loss: -0.03941809037604775
          total_loss: 0.26491224150467885
          vf_explained_var: 0.839900553226471
          vf_loss: 0.32124684788605085
    num_agent_steps_sampled: 159936
    num_agent_steps_trained: 159936
    num_steps_sampled: 159936
    num_steps_trained: 159936
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,16,3155.15,159936,3.20378,7.16,0.09,89.4505




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 169932
  custom_metrics: {}
  date: 2021-11-06_23-03-54
  done: false
  episode_len_mean: 84.13333333333334
  episode_media: {}
  episode_reward_max: 9.680000000000001
  episode_reward_mean: 3.410083333333341
  episode_reward_min: -1.630000000000001
  episodes_this_iter: 120
  episodes_total: 1771
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000001
          cur_lr: 0.00010000000000000002
          entropy: 2.4809008734857936
          entropy_coeff: 0.01
          kl: 0.020637559266016972
          policy_loss: -0.0401372610599312
          total_loss: 0.29531532770548113
          vf_explained_var: 0.8618206977844238
          vf_loss: 0.3509746960658803
    num_agent_steps_sampled: 169932
    num_agent_steps_trained: 169932
    num_steps_sampled: 169932
    num_steps_trained: 169932
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,17,3475.13,169932,3.41008,9.68,-1.63,84.1333




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 179928
  custom_metrics: {}
  date: 2021-11-06_23-10-31
  done: false
  episode_len_mean: 78.078125
  episode_media: {}
  episode_reward_max: 9.430000000000007
  episode_reward_mean: 3.6878125000000064
  episode_reward_min: -1.5200000000000005
  episodes_this_iter: 128
  episodes_total: 1899
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999999
          cur_lr: 0.00010000000000000002
          entropy: 2.4428030230041244
          entropy_coeff: 0.01
          kl: 0.01808975568574261
          policy_loss: -0.03721424026303312
          total_loss: 0.29554537732918296
          vf_explained_var: 0.8480872511863708
          vf_loss: 0.3449770635455592
    num_agent_steps_sampled: 179928
    num_agent_steps_trained: 179928
    num_steps_sampled: 179928
    num_steps_trained: 179928
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,18,3871.42,179928,3.68781,9.43,-1.52,78.0781




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 189924
  custom_metrics: {}
  date: 2021-11-06_23-17-14
  done: false
  episode_len_mean: 79.624
  episode_media: {}
  episode_reward_max: 10.870000000000013
  episode_reward_mean: 3.826480000000007
  episode_reward_min: -1.1200000000000008
  episodes_this_iter: 125
  episodes_total: 2024
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999999
          cur_lr: 0.00010000000000000002
          entropy: 2.418796075307406
          entropy_coeff: 0.01
          kl: 0.018642236216753386
          policy_loss: -0.040468431994892085
          total_loss: 0.281378404980796
          vf_explained_var: 0.8543462157249451
          vf_loss: 0.33345128616206665
    num_agent_steps_sampled: 189924
    num_agent_steps_trained: 189924
    num_steps_sampled: 189924
    num_steps_trained: 189924
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,19,4274.5,189924,3.82648,10.87,-1.12,79.624




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 199920
  custom_metrics: {}
  date: 2021-11-06_23-22-00
  done: false
  episode_len_mean: 85.17796610169492
  episode_media: {}
  episode_reward_max: 8.73000000000001
  episode_reward_mean: 3.441101694915263
  episode_reward_min: 0.16999999999999915
  episodes_this_iter: 118
  episodes_total: 2142
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999999
          cur_lr: 0.00010000000000000002
          entropy: 2.425335910381415
          entropy_coeff: 0.01
          kl: 0.018698607429357954
          policy_loss: -0.042993195028577604
          total_loss: 0.2942822369070262
          vf_explained_var: 0.8412535190582275
          vf_loss: 0.3489072308007978
    num_agent_steps_sampled: 199920
    num_agent_steps_trained: 199920
    num_steps_sampled: 199920
    num_steps_trained: 199920
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,20,4560.36,199920,3.4411,8.73,0.17,85.178




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 209916
  custom_metrics: {}
  date: 2021-11-06_23-29-24
  done: false
  episode_len_mean: 78.40625
  episode_media: {}
  episode_reward_max: 9.89
  episode_reward_mean: 3.4210156250000066
  episode_reward_min: -1.1900000000000008
  episodes_this_iter: 128
  episodes_total: 2270
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999999
          cur_lr: 0.00010000000000000002
          entropy: 2.4237866195858033
          entropy_coeff: 0.01
          kl: 0.017780377532087634
          policy_loss: -0.046512353108224706
          total_loss: 0.23710899967381843
          vf_explained_var: 0.8547632694244385
          vf_loss: 0.29585746350324055
    num_agent_steps_sampled: 209916
    num_agent_steps_trained: 209916
    num_steps_sampled: 209916
    num_steps_trained: 209916
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,21,5004.68,209916,3.42102,9.89,-1.19,78.4062




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 219912
  custom_metrics: {}
  date: 2021-11-06_23-34-52
  done: false
  episode_len_mean: 86.72173913043478
  episode_media: {}
  episode_reward_max: 9.730000000000004
  episode_reward_mean: 3.7476521739130524
  episode_reward_min: -1.569999999999995
  episodes_this_iter: 115
  episodes_total: 2385
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999999
          cur_lr: 0.00010000000000000002
          entropy: 2.4194643079725084
          entropy_coeff: 0.01
          kl: 0.02015789779634748
          policy_loss: -0.04479248279498683
          total_loss: 0.27843376113754564
          vf_explained_var: 0.8601797819137573
          vf_loss: 0.33381430635976994
    num_agent_steps_sampled: 219912
    num_agent_steps_trained: 219912
    num_steps_sampled: 219912
    num_steps_trained: 219912
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,22,5333.05,219912,3.74765,9.73,-1.57,86.7217




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 229908
  custom_metrics: {}
  date: 2021-11-06_23-41-29
  done: false
  episode_len_mean: 83.84615384615384
  episode_media: {}
  episode_reward_max: 10.440000000000015
  episode_reward_mean: 3.996495726495735
  episode_reward_min: -0.07000000000000067
  episodes_this_iter: 117
  episodes_total: 2502
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.41907168820373
          entropy_coeff: 0.01
          kl: 0.01666302551976726
          policy_loss: -0.04908370156493834
          total_loss: 0.2599359986077771
          vf_explained_var: 0.850044846534729
          vf_loss: 0.3163391024638445
    num_agent_steps_sampled: 229908
    num_agent_steps_trained: 229908
    num_steps_sampled: 229908
    num_steps_trained: 229908
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,23,5729.91,229908,3.9965,10.44,-0.07,83.8462




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 239904
  custom_metrics: {}
  date: 2021-11-06_23-48-03
  done: false
  episode_len_mean: 84.81666666666666
  episode_media: {}
  episode_reward_max: 9.89
  episode_reward_mean: 3.811666666666676
  episode_reward_min: -1.860000000000001
  episodes_this_iter: 120
  episodes_total: 2622
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.396184074776804
          entropy_coeff: 0.01
          kl: 0.01651395082867579
          policy_loss: -0.044860115060264355
          total_loss: 0.24147657083236
          vf_explained_var: 0.8714379668235779
          vf_loss: 0.29357815130462506
    num_agent_steps_sampled: 239904
    num_agent_steps_trained: 239904
    num_steps_sampled: 239904
    num_steps_trained: 239904
  iterations_since_restore: 24


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,24,6124.05,239904,3.81167,9.89,-1.86,84.8167




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 249900
  custom_metrics: {}
  date: 2021-11-06_23-54-17
  done: false
  episode_len_mean: 81.80327868852459
  episode_media: {}
  episode_reward_max: 10.890000000000013
  episode_reward_mean: 4.035163934426238
  episode_reward_min: 0.019999999999999053
  episodes_this_iter: 122
  episodes_total: 2744
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.3832997169250096
          entropy_coeff: 0.01
          kl: 0.017087917827843548
          policy_loss: -0.046394988188408634
          total_loss: 0.25245897526709504
          vf_explained_var: 0.878831148147583
          vf_loss: 0.3053854423392023
    num_agent_steps_sampled: 249900
    num_agent_steps_trained: 249900
    num_steps_sampled: 249900
    num_steps_trained: 249900
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,25,6497.45,249900,4.03516,10.89,0.02,81.8033




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 259896
  custom_metrics: {}
  date: 2021-11-07_00-01-21
  done: false
  episode_len_mean: 83.4201680672269
  episode_media: {}
  episode_reward_max: 9.89
  episode_reward_mean: 3.9489075630252186
  episode_reward_min: -0.18000000000000083
  episodes_this_iter: 119
  episodes_total: 2863
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.365568749517457
          entropy_coeff: 0.01
          kl: 0.01661658791001848
          policy_loss: -0.04441583103731147
          total_loss: 0.25259184654738404
          vf_explained_var: 0.8834826350212097
          vf_loss: 0.30383906855415077
    num_agent_steps_sampled: 259896
    num_agent_steps_trained: 259896
    num_steps_sampled: 259896
    num_steps_trained: 259896
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,26,6921.58,259896,3.94891,9.89,-0.18,83.4202




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 269892
  custom_metrics: {}
  date: 2021-11-07_00-06-54
  done: false
  episode_len_mean: 86.18103448275862
  episode_media: {}
  episode_reward_max: 9.88
  episode_reward_mean: 3.9601724137931122
  episode_reward_min: -1.6100000000000008
  episodes_this_iter: 116
  episodes_total: 2979
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.3712122273241354
          entropy_coeff: 0.01
          kl: 0.015999326070301686
          policy_loss: -0.04892850153339215
          total_loss: 0.18668069263171946
          vf_explained_var: 0.8929296135902405
          vf_loss: 0.24312199890040434
    num_agent_steps_sampled: 269892
    num_agent_steps_trained: 269892
    num_steps_sampled: 269892
    num_steps_trained: 269892
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,27,7254.78,269892,3.96017,9.88,-1.61,86.181




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 279888
  custom_metrics: {}
  date: 2021-11-07_00-13-37
  done: false
  episode_len_mean: 78.1015625
  episode_media: {}
  episode_reward_max: 9.89
  episode_reward_mean: 4.3266406250000085
  episode_reward_min: 0.21999999999999958
  episodes_this_iter: 128
  episodes_total: 3107
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.3172374552131716
          entropy_coeff: 0.01
          kl: 0.017695175014674703
          policy_loss: -0.05151415873541791
          total_loss: 0.22923505681797735
          vf_explained_var: 0.8902043104171753
          vf_loss: 0.28600522385448474
    num_agent_steps_sampled: 279888
    num_agent_steps_trained: 279888
    num_steps_sampled: 279888
    num_steps_trained: 279888
  iterations_since_restore: 28
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,28,7657.07,279888,4.32664,9.89,0.22,78.1016




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 289884
  custom_metrics: {}
  date: 2021-11-07_00-22-26
  done: false
  episode_len_mean: 75.93181818181819
  episode_media: {}
  episode_reward_max: 10.300000000000017
  episode_reward_mean: 4.492954545454554
  episode_reward_min: 0.10000000000000009
  episodes_this_iter: 132
  episodes_total: 3239
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.308614988714202
          entropy_coeff: 0.01
          kl: 0.016512230574337317
          policy_loss: -0.051402418480979074
          total_loss: 0.1891327839694981
          vf_explained_var: 0.9079654812812805
          vf_loss: 0.24690271742705605
    num_agent_steps_sampled: 289884
    num_agent_steps_trained: 289884
    num_steps_sampled: 289884
    num_steps_trained: 289884
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,29,8186.01,289884,4.49295,10.3,0.1,75.9318




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 299880
  custom_metrics: {}
  date: 2021-11-07_00-29-08
  done: false
  episode_len_mean: 82.15573770491804
  episode_media: {}
  episode_reward_max: 10.370000000000019
  episode_reward_mean: 4.5969672131147625
  episode_reward_min: 0.11999999999999834
  episodes_this_iter: 122
  episodes_total: 3361
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.3144637847558047
          entropy_coeff: 0.01
          kl: 0.017945152640642094
          policy_loss: -0.046951614374406314
          total_loss: 0.23649376154614565
          vf_explained_var: 0.9028477072715759
          vf_loss: 0.28842054448193977
    num_agent_steps_sampled: 299880
    num_agent_steps_trained: 299880
    num_steps_sampled: 299880
    num_steps_trained: 299880
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,30,8588.82,299880,4.59697,10.37,0.12,82.1557




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 309876
  custom_metrics: {}
  date: 2021-11-07_00-36-00
  done: false
  episode_len_mean: 85.00847457627118
  episode_media: {}
  episode_reward_max: 10.040000000000022
  episode_reward_mean: 4.342711864406791
  episode_reward_min: -0.17000000000000112
  episodes_this_iter: 118
  episodes_total: 3479
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.3218603213628133
          entropy_coeff: 0.01
          kl: 0.01589041217897515
          policy_loss: -0.05559979406161568
          total_loss: 0.17534259206527827
          vf_explained_var: 0.9093758463859558
          vf_loss: 0.23807194797465434
    num_agent_steps_sampled: 309876
    num_agent_steps_trained: 309876
    num_steps_sampled: 309876
    num_steps_trained: 309876
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,31,9000.47,309876,4.34271,10.04,-0.17,85.0085




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 319872
  custom_metrics: {}
  date: 2021-11-07_00-42-09
  done: false
  episode_len_mean: 83.36974789915966
  episode_media: {}
  episode_reward_max: 10.780000000000015
  episode_reward_mean: 4.928235294117658
  episode_reward_min: 0.16999999999999926
  episodes_this_iter: 119
  episodes_total: 3598
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.2806643241491074
          entropy_coeff: 0.01
          kl: 0.0173362067063078
          policy_loss: -0.05652891714285072
          total_loss: 0.209269319065552
          vf_explained_var: 0.9056582450866699
          vf_loss: 0.2710519694428668
    num_agent_steps_sampled: 319872
    num_agent_steps_trained: 319872
    num_steps_sampled: 319872
    num_steps_trained: 319872
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,32,9369.5,319872,4.92824,10.78,0.17,83.3697




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 329868
  custom_metrics: {}
  date: 2021-11-07_00-49-29
  done: false
  episode_len_mean: 78.0625
  episode_media: {}
  episode_reward_max: 10.46000000000002
  episode_reward_mean: 4.827734375000009
  episode_reward_min: 0.30000000000000904
  episodes_this_iter: 128
  episodes_total: 3726
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.2742595540152655
          entropy_coeff: 0.01
          kl: 0.017808138584914677
          policy_loss: -0.053843111287738776
          total_loss: 0.2533282441803469
          vf_explained_var: 0.8950046896934509
          vf_loss: 0.31188320906307454
    num_agent_steps_sampled: 329868
    num_agent_steps_trained: 329868
    num_steps_sampled: 329868
    num_steps_trained: 329868
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,33,9809.18,329868,4.82773,10.46,0.3,78.0625




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 339864
  custom_metrics: {}
  date: 2021-11-07_00-57-39
  done: false
  episode_len_mean: 78.11627906976744
  episode_media: {}
  episode_reward_max: 10.320000000000023
  episode_reward_mean: 4.780232558139544
  episode_reward_min: 0.26999999999999913
  episodes_this_iter: 129
  episodes_total: 3855
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.2763678843139585
          entropy_coeff: 0.01
          kl: 0.016563441101781533
          policy_loss: -0.05722061125322794
          total_loss: 0.1898431322330402
          vf_explained_var: 0.9277887344360352
          vf_loss: 0.2530569352591649
    num_agent_steps_sampled: 339864
    num_agent_steps_trained: 339864
    num_steps_sampled: 339864
    num_steps_trained: 339864
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,34,10299.3,339864,4.78023,10.32,0.27,78.1163




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 349860
  custom_metrics: {}
  date: 2021-11-07_01-04-28
  done: false
  episode_len_mean: 81.7603305785124
  episode_media: {}
  episode_reward_max: 10.720000000000017
  episode_reward_mean: 5.05413223140497
  episode_reward_min: -1.0299999999999985
  episodes_this_iter: 121
  episodes_total: 3976
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.2576221572028268
          entropy_coeff: 0.01
          kl: 0.0180585897738117
          policy_loss: -0.05428455516568616
          total_loss: 0.1917799477545051
          vf_explained_var: 0.9244428873062134
          vf_loss: 0.25035640032818685
    num_agent_steps_sampled: 349860
    num_agent_steps_trained: 349860
    num_steps_sampled: 349860
    num_steps_trained: 349860
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,35,10707.9,349860,5.05413,10.72,-1.03,81.7603




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 359856
  custom_metrics: {}
  date: 2021-11-07_01-11-05
  done: false
  episode_len_mean: 84.65546218487395
  episode_media: {}
  episode_reward_max: 12.070000000000023
  episode_reward_mean: 4.510084033613457
  episode_reward_min: -1.530000000000001
  episodes_this_iter: 119
  episodes_total: 4095
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.2741868382845167
          entropy_coeff: 0.01
          kl: 0.01751549159504006
          policy_loss: -0.05436535127874878
          total_loss: 0.1805545209883115
          vf_explained_var: 0.9137751460075378
          vf_loss: 0.2399273043928238
    num_agent_steps_sampled: 359856
    num_agent_steps_trained: 359856
    num_steps_sampled: 359856
    num_steps_trained: 359856
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,36,11105.4,359856,4.51008,12.07,-1.53,84.6555




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 369852
  custom_metrics: {}
  date: 2021-11-07_01-17-51
  done: false
  episode_len_mean: 82.24793388429752
  episode_media: {}
  episode_reward_max: 12.860000000000012
  episode_reward_mean: 4.7206611570248045
  episode_reward_min: 0.12000000000000476
  episodes_this_iter: 121
  episodes_total: 4216
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.2612816981780224
          entropy_coeff: 0.01
          kl: 0.016862576518122393
          policy_loss: -0.05547959861369469
          total_loss: 0.16241388307629614
          vf_explained_var: 0.9258328676223755
          vf_loss: 0.22343293997880995
    num_agent_steps_sampled: 369852
    num_agent_steps_trained: 369852
    num_steps_sampled: 369852
    num_steps_trained: 369852
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,37,11511.1,369852,4.72066,12.86,0.12,82.2479




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 379848
  custom_metrics: {}
  date: 2021-11-07_01-24-38
  done: false
  episode_len_mean: 80.264
  episode_media: {}
  episode_reward_max: 12.55000000000002
  episode_reward_mean: 5.499760000000011
  episode_reward_min: 0.5700000000000033
  episodes_this_iter: 125
  episodes_total: 4341
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.223712661836901
          entropy_coeff: 0.01
          kl: 0.01855090029423235
          policy_loss: -0.05586306244357783
          total_loss: 0.21061393776828916
          vf_explained_var: 0.9262011647224426
          vf_loss: 0.2699313397813811
    num_agent_steps_sampled: 379848
    num_agent_steps_trained: 379848
    num_steps_sampled: 379848
    num_steps_trained: 379848
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,38,11918.2,379848,5.49976,12.55,0.57,80.264




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 389844
  custom_metrics: {}
  date: 2021-11-07_01-31-13
  done: false
  episode_len_mean: 78.1259842519685
  episode_media: {}
  episode_reward_max: 12.010000000000021
  episode_reward_mean: 5.283622047244105
  episode_reward_min: 0.2699999999999989
  episodes_this_iter: 127
  episodes_total: 4468
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.232018192812928
          entropy_coeff: 0.01
          kl: 0.01771654079742359
          policy_loss: -0.06035850683274942
          total_loss: 0.16173392571986486
          vf_explained_var: 0.9261177182197571
          vf_loss: 0.226474614517811
    num_agent_steps_sampled: 389844
    num_agent_steps_trained: 389844
    num_steps_sampled: 389844
    num_steps_trained: 389844
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,39,12312.8,389844,5.28362,12.01,0.27,78.126




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 399840
  custom_metrics: {}
  date: 2021-11-07_01-38-41
  done: false
  episode_len_mean: 81.0725806451613
  episode_media: {}
  episode_reward_max: 10.64000000000002
  episode_reward_mean: 4.866612903225817
  episode_reward_min: 0.02999999999999889
  episodes_this_iter: 124
  episodes_total: 4592
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.218476415088034
          entropy_coeff: 0.01
          kl: 0.017592417102627433
          policy_loss: -0.0565326711592766
          total_loss: 0.17512658311643153
          vf_explained_var: 0.9150434732437134
          vf_loss: 0.23603169456060624
    num_agent_steps_sampled: 399840
    num_agent_steps_trained: 399840
    num_steps_sampled: 399840
    num_steps_trained: 399840
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,40,12760.8,399840,4.86661,10.64,0.03,81.0726




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 409836
  custom_metrics: {}
  date: 2021-11-07_01-45-08
  done: false
  episode_len_mean: 79.76984126984127
  episode_media: {}
  episode_reward_max: 11.95000000000002
  episode_reward_mean: 4.837142857142868
  episode_reward_min: -0.2000000000000011
  episodes_this_iter: 126
  episodes_total: 4718
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.2359801929221192
          entropy_coeff: 0.01
          kl: 0.016918436235298342
          policy_loss: -0.06032231456289689
          total_loss: 0.16498443071658794
          vf_explained_var: 0.9176260232925415
          vf_loss: 0.2305366295365951
    num_agent_steps_sampled: 409836
    num_agent_steps_trained: 409836
    num_steps_sampled: 409836
    num_steps_trained: 409836
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,41,13148.3,409836,4.83714,11.95,-0.2,79.7698




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 419832
  custom_metrics: {}
  date: 2021-11-07_01-52-28
  done: false
  episode_len_mean: 75.1590909090909
  episode_media: {}
  episode_reward_max: 13.120000000000013
  episode_reward_mean: 5.256212121212131
  episode_reward_min: 0.3599999999999989
  episodes_this_iter: 132
  episodes_total: 4850
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.198389208214915
          entropy_coeff: 0.01
          kl: 0.018651370108059
          policy_loss: -0.05625665997847533
          total_loss: 0.1738084363997874
          vf_explained_var: 0.929084062576294
          vf_loss: 0.23316447551433855
    num_agent_steps_sampled: 419832
    num_agent_steps_trained: 419832
    num_steps_sampled: 419832
    num_steps_trained: 419832
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,42,13587.8,419832,5.25621,13.12,0.36,75.1591




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 429828
  custom_metrics: {}
  date: 2021-11-07_02-01-02
  done: false
  episode_len_mean: 73.74264705882354
  episode_media: {}
  episode_reward_max: 12.070000000000023
  episode_reward_mean: 5.6569117647058915
  episode_reward_min: 0.35000000000000064
  episodes_this_iter: 136
  episodes_total: 4986
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.1817500832753303
          entropy_coeff: 0.01
          kl: 0.019941017957862444
          policy_loss: -0.05459857750206422
          total_loss: 0.21031253687305074
          vf_explained_var: 0.924432098865509
          vf_loss: 0.26653833327512455
    num_agent_steps_sampled: 429828
    num_agent_steps_trained: 429828
    num_steps_sampled: 429828
    num_steps_trained: 429828
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,43,14101.9,429828,5.65691,12.07,0.35,73.7426




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 439824
  custom_metrics: {}
  date: 2021-11-07_02-08-07
  done: false
  episode_len_mean: 75.08208955223881
  episode_media: {}
  episode_reward_max: 11.86000000000002
  episode_reward_mean: 5.253432835820906
  episode_reward_min: -1.7200000000000009
  episodes_this_iter: 134
  episodes_total: 5120
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.188707156874176
          entropy_coeff: 0.01
          kl: 0.016769512826769153
          policy_loss: -0.05890241562524158
          total_loss: 0.14853591298222796
          vf_explained_var: 0.9375755190849304
          vf_loss: 0.2123462670060814
    num_agent_steps_sampled: 439824
    num_agent_steps_trained: 439824
    num_steps_sampled: 439824
    num_steps_trained: 439824
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,44,14526.7,439824,5.25343,11.86,-1.72,75.0821




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 449820
  custom_metrics: {}
  date: 2021-11-07_02-16-31
  done: false
  episode_len_mean: 75.91472868217055
  episode_media: {}
  episode_reward_max: 11.910000000000021
  episode_reward_mean: 5.163100775193809
  episode_reward_min: 0.22999999999999896
  episodes_this_iter: 129
  episodes_total: 5249
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.1766435366410475
          entropy_coeff: 0.01
          kl: 0.018786932462139694
          policy_loss: -0.05711398767944202
          total_loss: 0.18857668915795336
          vf_explained_var: 0.9247210025787354
          vf_loss: 0.24843534149038485
    num_agent_steps_sampled: 449820
    num_agent_steps_trained: 449820
    num_steps_sampled: 449820
    num_steps_trained: 449820
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,45,15031.2,449820,5.1631,11.91,0.23,75.9147




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 459816
  custom_metrics: {}
  date: 2021-11-07_02-25-34
  done: false
  episode_len_mean: 68.66666666666667
  episode_media: {}
  episode_reward_max: 13.070000000000013
  episode_reward_mean: 5.677891156462594
  episode_reward_min: 0.3599999999999991
  episodes_this_iter: 147
  episodes_total: 5396
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.1379926694764033
          entropy_coeff: 0.01
          kl: 0.017963978835499658
          policy_loss: -0.05780736955089701
          total_loss: 0.1855272352257664
          vf_explained_var: 0.9402636885643005
          vf_loss: 0.2465260014988673
    num_agent_steps_sampled: 459816
    num_agent_steps_trained: 459816
    num_steps_sampled: 459816
    num_steps_trained: 459816
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,46,15573.6,459816,5.67789,13.07,0.36,68.6667




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 469812
  custom_metrics: {}
  date: 2021-11-07_02-32-53
  done: false
  episode_len_mean: 74.78358208955224
  episode_media: {}
  episode_reward_max: 12.32000000000002
  episode_reward_mean: 4.88768656716419
  episode_reward_min: 0.6099999999999998
  episodes_this_iter: 134
  episodes_total: 5530
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.206125971598503
          entropy_coeff: 0.01
          kl: 0.016196799383156296
          policy_loss: -0.06281006268551971
          total_loss: 0.12652334021006384
          vf_explained_var: 0.938781201839447
          vf_loss: 0.1949954021562878
    num_agent_steps_sampled: 469812
    num_agent_steps_trained: 469812
    num_steps_sampled: 469812
    num_steps_trained: 469812
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,47,16013,469812,4.88769,12.32,0.61,74.7836




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 479808
  custom_metrics: {}
  date: 2021-11-07_02-42-06
  done: false
  episode_len_mean: 71.00714285714285
  episode_media: {}
  episode_reward_max: 13.020000000000012
  episode_reward_mean: 5.862785714285724
  episode_reward_min: 0.34000000000000186
  episodes_this_iter: 140
  episodes_total: 5670
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.1313935042446497
          entropy_coeff: 0.01
          kl: 0.018530758908821852
          policy_loss: -0.05709982999942751
          total_loss: 0.20476613521066486
          vf_explained_var: 0.9411544799804688
          vf_loss: 0.2644175054553228
    num_agent_steps_sampled: 479808
    num_agent_steps_trained: 479808
    num_steps_sampled: 479808
    num_steps_trained: 479808
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,48,16566.3,479808,5.86279,13.02,0.34,71.0071




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 489804
  custom_metrics: {}
  date: 2021-11-07_02-49-44
  done: false
  episode_len_mean: 73.28467153284672
  episode_media: {}
  episode_reward_max: 13.060000000000015
  episode_reward_mean: 5.442627737226286
  episode_reward_min: 0.520000000000001
  episodes_this_iter: 137
  episodes_total: 5807
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.1557112091626878
          entropy_coeff: 0.01
          kl: 0.018542158807555446
          policy_loss: -0.05727011611064275
          total_loss: 0.20070196683239988
          vf_explained_var: 0.9295142889022827
          vf_loss: 0.26075525741355543
    num_agent_steps_sampled: 489804
    num_agent_steps_trained: 489804
    num_steps_sampled: 489804
    num_steps_trained: 489804
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,49,17023.6,489804,5.44263,13.06,0.52,73.2847




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 499800
  custom_metrics: {}
  date: 2021-11-07_02-57-56
  done: false
  episode_len_mean: 72.6231884057971
  episode_media: {}
  episode_reward_max: 13.090000000000012
  episode_reward_mean: 5.495217391304357
  episode_reward_min: 0.4499999999999993
  episodes_this_iter: 138
  episodes_total: 5945
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 0.00010000000000000002
          entropy: 2.123642136398544
          entropy_coeff: 0.01
          kl: 0.02002683255329158
          policy_loss: -0.05435470100173838
          total_loss: 0.2584059531084047
          vf_explained_var: 0.9274259805679321
          vf_loss: 0.313719906578334
    num_agent_steps_sampled: 499800
    num_agent_steps_trained: 499800
    num_steps_sampled: 499800
    num_steps_trained: 499800
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,50,17516.2,499800,5.49522,13.09,0.45,72.6232




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 509796
  custom_metrics: {}
  date: 2021-11-07_03-05-24
  done: false
  episode_len_mean: 75.59398496240601
  episode_media: {}
  episode_reward_max: 11.98000000000002
  episode_reward_mean: 5.302556390977452
  episode_reward_min: 9.246076126956382e-16
  episodes_this_iter: 133
  episodes_total: 6078
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.184232100360414
          entropy_coeff: 0.01
          kl: 0.013720731648978043
          policy_loss: -0.05756957553223603
          total_loss: 0.17248690475064973
          vf_explained_var: 0.9402358531951904
          vf_loss: 0.23106044084470495
    num_agent_steps_sampled: 509796
    num_agent_steps_trained: 509796
    num_steps_sampled: 509796
    num_steps_trained: 509796


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,51,17963.8,509796,5.30256,11.98,9.24608e-16,75.594




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 519792
  custom_metrics: {}
  date: 2021-11-07_03-12-57
  done: false
  episode_len_mean: 74.62406015037594
  episode_media: {}
  episode_reward_max: 12.990000000000013
  episode_reward_mean: 5.658195488721813
  episode_reward_min: 0.29999999999999905
  episodes_this_iter: 133
  episodes_total: 6211
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.147796976770091
          entropy_coeff: 0.01
          kl: 0.015107283735260758
          policy_loss: -0.060471870410295885
          total_loss: 0.1546619632615684
          vf_explained_var: 0.9434837698936462
          vf_loss: 0.21366761673210014
    num_agent_steps_sampled: 519792
    num_agent_steps_trained: 519792
    num_steps_sampled: 519792
    num_steps_trained: 519792
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,52,18416.3,519792,5.6582,12.99,0.3,74.6241




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 529788
  custom_metrics: {}
  date: 2021-11-07_03-20-24
  done: false
  episode_len_mean: 73.55474452554745
  episode_media: {}
  episode_reward_max: 12.880000000000015
  episode_reward_mean: 5.5215328467153375
  episode_reward_min: -0.1300000000000011
  episodes_this_iter: 137
  episodes_total: 6348
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.1543233970291595
          entropy_coeff: 0.01
          kl: 0.01495507324912447
          policy_loss: -0.05854102731960961
          total_loss: 0.184578550245581
          vf_explained_var: 0.9299817085266113
          vf_loss: 0.24194979367729946
    num_agent_steps_sampled: 529788
    num_agent_steps_trained: 529788
    num_steps_sampled: 529788
    num_steps_trained: 529788
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,53,18863.8,529788,5.52153,12.88,-0.13,73.5547




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 539784
  custom_metrics: {}
  date: 2021-11-07_03-29-16
  done: false
  episode_len_mean: 70.0
  episode_media: {}
  episode_reward_max: 12.920000000000014
  episode_reward_mean: 5.888156028368804
  episode_reward_min: 0.41999999999999915
  episodes_this_iter: 141
  episodes_total: 6489
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.1293531532980436
          entropy_coeff: 0.01
          kl: 0.014746773751911083
          policy_loss: -0.05581888242935141
          total_loss: 0.19435993493335624
          vf_explained_var: 0.9347234964370728
          vf_loss: 0.24907568621520812
    num_agent_steps_sampled: 539784
    num_agent_steps_trained: 539784
    num_steps_sampled: 539784
    num_steps_trained: 539784
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,54,19395.3,539784,5.88816,12.92,0.42,70




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 549780
  custom_metrics: {}
  date: 2021-11-07_03-37-18
  done: false
  episode_len_mean: 74.15441176470588
  episode_media: {}
  episode_reward_max: 13.030000000000014
  episode_reward_mean: 5.426838235294127
  episode_reward_min: -1.0799999999999996
  episodes_this_iter: 136
  episodes_total: 6625
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.1824990317352815
          entropy_coeff: 0.01
          kl: 0.015149984567977462
          policy_loss: -0.05730666191452462
          total_loss: 0.20101420485900126
          vf_explained_var: 0.9329208135604858
          vf_loss: 0.25713681807248
    num_agent_steps_sampled: 549780
    num_agent_steps_trained: 549780
    num_steps_sampled: 549780
    num_steps_trained: 549780
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,55,19878.1,549780,5.42684,13.03,-1.08,74.1544




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 559776
  custom_metrics: {}
  date: 2021-11-07_03-47-59
  done: false
  episode_len_mean: 64.89542483660131
  episode_media: {}
  episode_reward_max: 13.070000000000013
  episode_reward_mean: 5.716993464052296
  episode_reward_min: 0.39999999999999913
  episodes_this_iter: 153
  episodes_total: 6778
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.1170341002635467
          entropy_coeff: 0.01
          kl: 0.012630008153233814
          policy_loss: -0.05912718756410938
          total_loss: 0.10910919333020082
          vf_explained_var: 0.9582887887954712
          vf_loss: 0.17022489583931671
    num_agent_steps_sampled: 559776
    num_agent_steps_trained: 559776
    num_steps_sampled: 559776
    num_steps_trained: 559776


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,56,20518.6,559776,5.71699,13.07,0.4,64.8954




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 569772
  custom_metrics: {}
  date: 2021-11-07_03-56-20
  done: false
  episode_len_mean: 72.35507246376811
  episode_media: {}
  episode_reward_max: 13.74000000000002
  episode_reward_mean: 5.955797101449284
  episode_reward_min: 0.5600000000000022
  episodes_this_iter: 138
  episodes_total: 6916
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.0832734926134093
          entropy_coeff: 0.01
          kl: 0.01619218702371733
          policy_loss: -0.06048015007255678
          total_loss: 0.19880340004132854
          vf_explained_var: 0.943726658821106
          vf_loss: 0.2555244015545672
    num_agent_steps_sampled: 569772
    num_agent_steps_trained: 569772
    num_steps_sampled: 569772
    num_steps_trained: 569772
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,57,21019.1,569772,5.9558,13.74,0.56,72.3551




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 579768
  custom_metrics: {}
  date: 2021-11-07_04-03-21
  done: false
  episode_len_mean: 74.57777777777778
  episode_media: {}
  episode_reward_max: 13.020000000000014
  episode_reward_mean: 5.8519259259259355
  episode_reward_min: 0.4000000000000037
  episodes_this_iter: 135
  episodes_total: 7051
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.086603338901813
          entropy_coeff: 0.01
          kl: 0.015606230455600011
          policy_loss: -0.05773399359602322
          total_loss: 0.1781271454718951
          vf_explained_var: 0.9526033401489258
          vf_loss: 0.2330252098858866
    num_agent_steps_sampled: 579768
    num_agent_steps_trained: 579768
    num_steps_sampled: 579768
    num_steps_trained: 579768
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,58,21440.4,579768,5.85193,13.02,0.4,74.5778




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 589764
  custom_metrics: {}
  date: 2021-11-07_04-10-22
  done: false
  episode_len_mean: 80.31451612903226
  episode_media: {}
  episode_reward_max: 13.570000000000027
  episode_reward_mean: 6.010322580645172
  episode_reward_min: 0.3800000000000061
  episodes_this_iter: 124
  episodes_total: 7175
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.0773479437216733
          entropy_coeff: 0.01
          kl: 0.015073753350121018
          policy_loss: -0.05926216471876599
          total_loss: 0.21770645023251955
          vf_explained_var: 0.9392252564430237
          vf_loss: 0.27484883235560525
    num_agent_steps_sampled: 589764
    num_agent_steps_trained: 589764
    num_steps_sampled: 589764
    num_steps_trained: 589764
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,59,21861.8,589764,6.01032,13.57,0.38,80.3145




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 599760
  custom_metrics: {}
  date: 2021-11-07_04-18-42
  done: false
  episode_len_mean: 71.07801418439716
  episode_media: {}
  episode_reward_max: 13.970000000000024
  episode_reward_mean: 5.74304964539008
  episode_reward_min: 0.4200000000000065
  episodes_this_iter: 141
  episodes_total: 7316
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.065147665117541
          entropy_coeff: 0.01
          kl: 0.01415158046828657
          policy_loss: -0.0639263820508097
          total_loss: 0.14867187658340758
          vf_explained_var: 0.950508713722229
          vf_loss: 0.21175702313900505
    num_agent_steps_sampled: 599760
    num_agent_steps_trained: 599760
    num_steps_sampled: 599760
    num_steps_trained: 599760
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,60,22361.4,599760,5.74305,13.97,0.42,71.078




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 609756
  custom_metrics: {}
  date: 2021-11-07_04-26-15
  done: false
  episode_len_mean: 70.91489361702128
  episode_media: {}
  episode_reward_max: 13.190000000000012
  episode_reward_mean: 5.713546099290789
  episode_reward_min: -1.3800000000000008
  episodes_this_iter: 141
  episodes_total: 7457
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.07827275329166
          entropy_coeff: 0.01
          kl: 0.014631924886278371
          policy_loss: -0.05595487758326225
          total_loss: 0.15684442416422514
          vf_explained_var: 0.9471813440322876
          vf_loss: 0.21135979357820292
    num_agent_steps_sampled: 609756
    num_agent_steps_trained: 609756
    num_steps_sampled: 609756
    num_steps_trained: 609756
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,61,22814.5,609756,5.71355,13.19,-1.38,70.9149




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 619752
  custom_metrics: {}
  date: 2021-11-07_04-36-56
  done: false
  episode_len_mean: 67.02684563758389
  episode_media: {}
  episode_reward_max: 13.080000000000013
  episode_reward_mean: 6.310469798657726
  episode_reward_min: 0.5600000000000007
  episodes_this_iter: 149
  episodes_total: 7606
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.0148856381065827
          entropy_coeff: 0.01
          kl: 0.014510917330608048
          policy_loss: -0.059543824448990514
          total_loss: 0.14756621196468034
          vf_explained_var: 0.9598572254180908
          vf_loss: 0.20522043791910013
    num_agent_steps_sampled: 619752
    num_agent_steps_trained: 619752
    num_steps_sampled: 619752
    num_steps_trained: 619752


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,62,23455.9,619752,6.31047,13.08,0.56,67.0268




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 629748
  custom_metrics: {}
  date: 2021-11-07_04-44-39
  done: false
  episode_len_mean: 73.19117647058823
  episode_media: {}
  episode_reward_max: 13.310000000000008
  episode_reward_mean: 5.805588235294128
  episode_reward_min: 0.5299999999999989
  episodes_this_iter: 136
  episodes_total: 7742
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.0679812247936544
          entropy_coeff: 0.01
          kl: 0.014257522653780675
          policy_loss: -0.05835134333524949
          total_loss: 0.1583787517454953
          vf_explained_var: 0.9553336501121521
          vf_loss: 0.21575629451654404
    num_agent_steps_sampled: 629748
    num_agent_steps_trained: 629748
    num_steps_sampled: 629748
    num_steps_trained: 629748
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,63,23918.1,629748,5.80559,13.31,0.53,73.1912




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 639744
  custom_metrics: {}
  date: 2021-11-07_04-52-53
  done: false
  episode_len_mean: 71.23404255319149
  episode_media: {}
  episode_reward_max: 13.710000000000024
  episode_reward_mean: 6.054468085106393
  episode_reward_min: 0.5100000000000005
  episodes_this_iter: 141
  episodes_total: 7883
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.0455455791237007
          entropy_coeff: 0.01
          kl: 0.014829667197812058
          policy_loss: -0.06470662058195752
          total_loss: 0.13791461448368225
          vf_explained_var: 0.9575884938240051
          vf_loss: 0.2005541337096793
    num_agent_steps_sampled: 639744
    num_agent_steps_trained: 639744
    num_steps_sampled: 639744
    num_steps_trained: 639744
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,64,24412.6,639744,6.05447,13.71,0.51,71.234




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 649740
  custom_metrics: {}
  date: 2021-11-07_04-59-22
  done: false
  episode_len_mean: 76.9076923076923
  episode_media: {}
  episode_reward_max: 13.21000000000001
  episode_reward_mean: 5.941307692307704
  episode_reward_min: 0.8900000000000031
  episodes_this_iter: 130
  episodes_total: 8013
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.0650990329237064
          entropy_coeff: 0.01
          kl: 0.01635077545418801
          policy_loss: -0.059298238640603344
          total_loss: 0.167563269718781
          vf_explained_var: 0.9450250864028931
          vf_loss: 0.2226797594808233
    num_agent_steps_sampled: 649740
    num_agent_steps_trained: 649740
    num_steps_sampled: 649740
    num_steps_trained: 649740
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,65,24801.6,649740,5.94131,13.21,0.89,76.9077




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 659736
  custom_metrics: {}
  date: 2021-11-07_05-09-11
  done: false
  episode_len_mean: 67.33561643835617
  episode_media: {}
  episode_reward_max: 13.060000000000013
  episode_reward_mean: 5.886301369863022
  episode_reward_min: 0.500000000000001
  episodes_this_iter: 146
  episodes_total: 8159
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.0626715789493333
          entropy_coeff: 0.01
          kl: 0.016470627530574725
          policy_loss: -0.0585150111761167
          total_loss: 0.19835696270466488
          vf_explained_var: 0.9453590512275696
          vf_loss: 0.2524839243636681
    num_agent_steps_sampled: 659736
    num_agent_steps_trained: 659736
    num_steps_sampled: 659736
    num_steps_trained: 659736
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,66,25390.3,659736,5.8863,13.06,0.5,67.3356




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 669732
  custom_metrics: {}
  date: 2021-11-07_05-16-44
  done: false
  episode_len_mean: 76.47368421052632
  episode_media: {}
  episode_reward_max: 13.200000000000012
  episode_reward_mean: 6.063007518797003
  episode_reward_min: 0.3099999999999987
  episodes_this_iter: 133
  episodes_total: 8292
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.0595217612054615
          entropy_coeff: 0.01
          kl: 0.014961483305963556
          policy_loss: -0.05930158668794693
          total_loss: 0.1801415704127051
          vf_explained_var: 0.9449120759963989
          vf_loss: 0.23731562206760431
    num_agent_steps_sampled: 669732
    num_agent_steps_trained: 669732
    num_steps_sampled: 669732
    num_steps_trained: 669732
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,67,25843.7,669732,6.06301,13.2,0.31,76.4737




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 679728
  custom_metrics: {}
  date: 2021-11-07_05-24-12
  done: false
  episode_len_mean: 72.13868613138686
  episode_media: {}
  episode_reward_max: 13.110000000000012
  episode_reward_mean: 6.051386861313878
  episode_reward_min: 0.35000000000000797
  episodes_this_iter: 137
  episodes_total: 8429
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.0519357175908537
          entropy_coeff: 0.01
          kl: 0.016708622636138846
          policy_loss: -0.05805128393296757
          total_loss: 0.21319802137505678
          vf_explained_var: 0.9436988830566406
          vf_loss: 0.26639244213827656
    num_agent_steps_sampled: 679728
    num_agent_steps_trained: 679728
    num_steps_sampled: 679728
    num_steps_trained: 679728


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,68,26291.5,679728,6.05139,13.11,0.35,72.1387




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 689724
  custom_metrics: {}
  date: 2021-11-07_05-30-56
  done: false
  episode_len_mean: 75.21641791044776
  episode_media: {}
  episode_reward_max: 15.15000000000001
  episode_reward_mean: 6.491194029850756
  episode_reward_min: 0.6999999999999988
  episodes_this_iter: 134
  episodes_total: 8563
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.017735736696129
          entropy_coeff: 0.01
          kl: 0.015039710078622417
          policy_loss: -0.06184021901562173
          total_loss: 0.18445522214134788
          vf_explained_var: 0.9520906209945679
          vf_loss: 0.2436312386368075
    num_agent_steps_sampled: 689724
    num_agent_steps_trained: 689724
    num_steps_sampled: 689724
    num_steps_trained: 689724
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,69,26695.4,689724,6.49119,15.15,0.7,75.2164




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 699720
  custom_metrics: {}
  date: 2021-11-07_05-43-30
  done: false
  episode_len_mean: 65.375
  episode_media: {}
  episode_reward_max: 14.100000000000023
  episode_reward_mean: 6.317565789473693
  episode_reward_min: 0.8000000000000095
  episodes_this_iter: 152
  episodes_total: 8715
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.027845080082233
          entropy_coeff: 0.01
          kl: 0.014837833754057108
          policy_loss: -0.06341650301956722
          total_loss: 0.1444003950438311
          vf_explained_var: 0.9567330479621887
          vf_loss: 0.2055603910333071
    num_agent_steps_sampled: 699720
    num_agent_steps_trained: 699720
    num_steps_sampled: 699720
    num_steps_trained: 699720
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,70,27449.2,699720,6.31757,14.1,0.8,65.375




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 709716
  custom_metrics: {}
  date: 2021-11-07_05-50-24
  done: false
  episode_len_mean: 75.56390977443608
  episode_media: {}
  episode_reward_max: 13.95000000000002
  episode_reward_mean: 6.371578947368432
  episode_reward_min: 0.6100000000000093
  episodes_this_iter: 133
  episodes_total: 8848
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.0326666622080354
          entropy_coeff: 0.01
          kl: 0.018349509949399854
          policy_loss: -0.05661863929982114
          total_loss: 0.21147908860674272
          vf_explained_var: 0.9494695663452148
          vf_loss: 0.26055607710702294
    num_agent_steps_sampled: 709716
    num_agent_steps_trained: 709716
    num_steps_sampled: 709716
    num_steps_trained: 709716
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,71,27862.9,709716,6.37158,13.95,0.61,75.5639




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 719712
  custom_metrics: {}
  date: 2021-11-07_05-59-54
  done: false
  episode_len_mean: 70.47183098591549
  episode_media: {}
  episode_reward_max: 13.070000000000014
  episode_reward_mean: 6.035985915492967
  episode_reward_min: 0.559999999999999
  episodes_this_iter: 142
  episodes_total: 8990
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.0442884862932384
          entropy_coeff: 0.01
          kl: 0.013847421799084403
          policy_loss: -0.057346197774904406
          total_loss: 0.14290183069916745
          vf_explained_var: 0.9514022469520569
          vf_loss: 0.19966014173104724
    num_agent_steps_sampled: 719712
    num_agent_steps_trained: 719712
    num_steps_sampled: 719712
    num_steps_trained: 719712
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,72,28432.9,719712,6.03599,13.07,0.56,70.4718




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 729708
  custom_metrics: {}
  date: 2021-11-07_06-07-20
  done: false
  episode_len_mean: 72.39855072463769
  episode_media: {}
  episode_reward_max: 13.100000000000012
  episode_reward_mean: 6.306739130434793
  episode_reward_min: 0.6800000000000029
  episodes_this_iter: 138
  episodes_total: 9128
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.0221513775678783
          entropy_coeff: 0.01
          kl: 0.015045133979372174
          policy_loss: -0.06485373360924741
          total_loss: 0.13336940954407503
          vf_explained_var: 0.9604810476303101
          vf_loss: 0.1955948592028302
    num_agent_steps_sampled: 729708
    num_agent_steps_trained: 729708
    num_steps_sampled: 729708
    num_steps_trained: 729708
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,73,28879.5,729708,6.30674,13.1,0.68,72.3986




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 739704
  custom_metrics: {}
  date: 2021-11-07_06-15-49
  done: false
  episode_len_mean: 73.28676470588235
  episode_media: {}
  episode_reward_max: 13.32000000000001
  episode_reward_mean: 6.845441176470598
  episode_reward_min: 0.7200000000000035
  episodes_this_iter: 136
  episodes_total: 9264
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9848543194624093
          entropy_coeff: 0.01
          kl: 0.01571277837139209
          policy_loss: -0.06422443947094118
          total_loss: 0.14874132757719893
          vf_explained_var: 0.9611971974372864
          vf_loss: 0.20895052884315324
    num_agent_steps_sampled: 739704
    num_agent_steps_trained: 739704
    num_steps_sampled: 739704
    num_steps_trained: 739704
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,74,29388.2,739704,6.84544,13.32,0.72,73.2868




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 749700
  custom_metrics: {}
  date: 2021-11-07_06-24-54
  done: false
  episode_len_mean: 70.37588652482269
  episode_media: {}
  episode_reward_max: 13.880000000000024
  episode_reward_mean: 6.0092198581560385
  episode_reward_min: 0.4200000000000055
  episodes_this_iter: 141
  episodes_total: 9405
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.0351844447290794
          entropy_coeff: 0.01
          kl: 0.015437402887187212
          policy_loss: -0.06377475223798527
          total_loss: 0.14158018480460996
          vf_explained_var: 0.9570084810256958
          vf_loss: 0.20226122636156968
    num_agent_steps_sampled: 749700
    num_agent_steps_trained: 749700
    num_steps_sampled: 749700
    num_steps_trained: 749700


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,75,29933.4,749700,6.00922,13.88,0.42,70.3759




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 759696
  custom_metrics: {}
  date: 2021-11-07_06-36-54
  done: false
  episode_len_mean: 67.70270270270271
  episode_media: {}
  episode_reward_max: 13.140000000000013
  episode_reward_mean: 6.3937837837837925
  episode_reward_min: 0.5999999999999989
  episodes_this_iter: 148
  episodes_total: 9553
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.0181184688185017
          entropy_coeff: 0.01
          kl: 0.016714106079691152
          policy_loss: -0.061238409956105244
          total_loss: 0.16378510009783964
          vf_explained_var: 0.9603580236434937
          vf_loss: 0.2198201487175165
    num_agent_steps_sampled: 759696
    num_agent_steps_trained: 759696
    num_steps_sampled: 759696
    num_steps_trained: 759696


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,76,30653.3,759696,6.39378,13.14,0.6,67.7027




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 769692
  custom_metrics: {}
  date: 2021-11-07_06-46-31
  done: false
  episode_len_mean: 70.20138888888889
  episode_media: {}
  episode_reward_max: 13.080000000000013
  episode_reward_mean: 6.46861111111112
  episode_reward_min: 0.670000000000014
  episodes_this_iter: 144
  episodes_total: 9697
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9864911590885912
          entropy_coeff: 0.01
          kl: 0.01571242278040411
          policy_loss: -0.064329983594899
          total_loss: 0.15412023746782644
          vf_explained_var: 0.958391547203064
          vf_loss: 0.2144518914226538
    num_agent_steps_sampled: 769692
    num_agent_steps_trained: 769692
    num_steps_sampled: 769692
    num_steps_trained: 769692
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,77,31229.8,769692,6.46861,13.08,0.67,70.2014




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 779688
  custom_metrics: {}
  date: 2021-11-07_06-55-52
  done: false
  episode_len_mean: 69.34027777777777
  episode_media: {}
  episode_reward_max: 13.130000000000013
  episode_reward_mean: 6.236875000000008
  episode_reward_min: 0.869999999999999
  episodes_this_iter: 144
  episodes_total: 9841
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.997704538422772
          entropy_coeff: 0.01
          kl: 0.017053657565771513
          policy_loss: -0.06176203147818645
          total_loss: 0.1799248424099965
          vf_explained_var: 0.9562793970108032
          vf_loss: 0.23576367799606587
    num_agent_steps_sampled: 779688
    num_agent_steps_trained: 779688
    num_steps_sampled: 779688
    num_steps_trained: 779688
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,78,31790.7,779688,6.23688,13.13,0.87,69.3403




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 789684
  custom_metrics: {}
  date: 2021-11-07_07-05-13
  done: false
  episode_len_mean: 68.54482758620689
  episode_media: {}
  episode_reward_max: 13.160000000000013
  episode_reward_mean: 6.384344827586216
  episode_reward_min: 0.5500000000000067
  episodes_this_iter: 145
  episodes_total: 9986
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9944460865778801
          entropy_coeff: 0.01
          kl: 0.017287281596030653
          policy_loss: -0.06078482551468361
          total_loss: 0.16440952511456533
          vf_explained_var: 0.9617472290992737
          vf_loss: 0.21888375219562625
    num_agent_steps_sampled: 789684
    num_agent_steps_trained: 789684
    num_steps_sampled: 789684
    num_steps_trained: 789684
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,79,32352,789684,6.38434,13.16,0.55,68.5448




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 799680
  custom_metrics: {}
  date: 2021-11-07_07-14-18
  done: false
  episode_len_mean: 70.60714285714286
  episode_media: {}
  episode_reward_max: 13.15000000000001
  episode_reward_mean: 5.831142857142866
  episode_reward_min: 0.6900000000000023
  episodes_this_iter: 140
  episodes_total: 10126
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.009293565281436
          entropy_coeff: 0.01
          kl: 0.014907546337934575
          policy_loss: -0.06759799011370056
          total_loss: 0.12113507744402457
          vf_explained_var: 0.9606860280036926
          vf_loss: 0.1861851683411843
    num_agent_steps_sampled: 799680
    num_agent_steps_trained: 799680
    num_steps_sampled: 799680
    num_steps_trained: 799680
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,80,32897.3,799680,5.83114,13.15,0.69,70.6071




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 809676
  custom_metrics: {}
  date: 2021-11-07_07-23-07
  done: false
  episode_len_mean: 72.19858156028369
  episode_media: {}
  episode_reward_max: 13.190000000000012
  episode_reward_mean: 6.286950354609939
  episode_reward_min: 0.5200000000000002
  episodes_this_iter: 141
  episodes_total: 10267
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9811907990365967
          entropy_coeff: 0.01
          kl: 0.016709491011088106
          policy_loss: -0.06133965336454984
          total_loss: 0.1564413134382767
          vf_explained_var: 0.9529363512992859
          vf_loss: 0.21221533631348713
    num_agent_steps_sampled: 809676
    num_agent_steps_trained: 809676
    num_steps_sampled: 809676
    num_steps_trained: 809676
 



Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,81,33426.3,809676,6.28695,13.19,0.52,72.1986




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 819672
  custom_metrics: {}
  date: 2021-11-07_07-29-45
  done: false
  episode_len_mean: 75.28787878787878
  episode_media: {}
  episode_reward_max: 13.800000000000022
  episode_reward_mean: 6.636212121212132
  episode_reward_min: 0.7900000000000006
  episodes_this_iter: 132
  episodes_total: 10399
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9749023978526776
          entropy_coeff: 0.01
          kl: 0.01632130648277102
          policy_loss: -0.05893709902993889
          total_loss: 0.1581075451336801
          vf_explained_var: 0.9612787961959839
          vf_loss: 0.21200568498645583
    num_agent_steps_sampled: 819672
    num_agent_steps_trained: 819672
    num_steps_sampled: 819672
    num_steps_trained: 819672
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,82,33823.3,819672,6.63621,13.8,0.79,75.2879




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 829668
  custom_metrics: {}
  date: 2021-11-07_07-36-39
  done: false
  episode_len_mean: 78.85826771653544
  episode_media: {}
  episode_reward_max: 13.200000000000012
  episode_reward_mean: 6.640551181102375
  episode_reward_min: 0.579999999999999
  episodes_this_iter: 127
  episodes_total: 10526
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.0066117818538958
          entropy_coeff: 0.01
          kl: 0.016956250926716026
          policy_loss: -0.06461293478138172
          total_loss: 0.15911679512056975
          vf_explained_var: 0.9582443237304688
          vf_loss: 0.21804354205791257
    num_agent_steps_sampled: 829668
    num_agent_steps_trained: 829668
    num_steps_sampled: 829668
    num_steps_trained: 829668
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,83,34237.6,829668,6.64055,13.2,0.58,78.8583




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 839664
  custom_metrics: {}
  date: 2021-11-07_07-43-59
  done: false
  episode_len_mean: 71.35460992907801
  episode_media: {}
  episode_reward_max: 14.04000000000002
  episode_reward_mean: 6.558865248226961
  episode_reward_min: 0.16000000000000555
  episodes_this_iter: 141
  episodes_total: 10667
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9579111519022885
          entropy_coeff: 0.01
          kl: 0.01695582814538251
          policy_loss: -0.06111923364174162
          total_loss: 0.1828127825521251
          vf_explained_var: 0.9537572860717773
          vf_loss: 0.23775946395392092
    num_agent_steps_sampled: 839664
    num_agent_steps_trained: 839664
    num_steps_sampled: 839664
    num_steps_trained: 839664
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,84,34678.2,839664,6.55887,14.04,0.16,71.3546




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 849660
  custom_metrics: {}
  date: 2021-11-07_07-53-19
  done: false
  episode_len_mean: 68.8041958041958
  episode_media: {}
  episode_reward_max: 14.00000000000002
  episode_reward_mean: 6.306083916083925
  episode_reward_min: 0.6300000000000052
  episodes_this_iter: 143
  episodes_total: 10810
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.96047997321838
          entropy_coeff: 0.01
          kl: 0.014177995822908095
          policy_loss: -0.06718892644231136
          total_loss: 0.09107257793617682
          vf_explained_var: 0.9667494893074036
          vf_loss: 0.15633347346328008
    num_agent_steps_sampled: 849660
    num_agent_steps_trained: 849660
    num_steps_sampled: 849660
    num_steps_trained: 849660
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,85,35237.4,849660,6.30608,14,0.63,68.8042




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 859656
  custom_metrics: {}
  date: 2021-11-07_08-01-42
  done: false
  episode_len_mean: 76.74809160305344
  episode_media: {}
  episode_reward_max: 13.230000000000011
  episode_reward_mean: 6.494198473282454
  episode_reward_min: 1.0200000000000014
  episodes_this_iter: 131
  episodes_total: 10941
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9757847873573628
          entropy_coeff: 0.01
          kl: 0.015107521423125398
          policy_loss: -0.06448835027523529
          total_loss: 0.11107624743897945
          vf_explained_var: 0.9666306972503662
          vf_loss: 0.17237789776717496
    num_agent_steps_sampled: 859656
    num_agent_steps_trained: 859656
    num_steps_sampled: 859656
    num_steps_trained: 859656


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,86,35740.8,859656,6.4942,13.23,1.02,76.7481




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 869652
  custom_metrics: {}
  date: 2021-11-07_08-09-25
  done: false
  episode_len_mean: 74.23703703703704
  episode_media: {}
  episode_reward_max: 13.180000000000012
  episode_reward_mean: 6.590296296296307
  episode_reward_min: 0.5000000000000036
  episodes_this_iter: 135
  episodes_total: 11076
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9692386649612688
          entropy_coeff: 0.01
          kl: 0.018029406041482083
          policy_loss: -0.06446195144174445
          total_loss: 0.16942761579098609
          vf_explained_var: 0.9513214826583862
          vf_loss: 0.22619979348129188
    num_agent_steps_sampled: 869652
    num_agent_steps_trained: 869652
    num_steps_sampled: 869652
    num_steps_trained: 869652


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,87,36203.8,869652,6.5903,13.18,0.5,74.237




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 879648
  custom_metrics: {}
  date: 2021-11-07_08-19-44
  done: false
  episode_len_mean: 66.86
  episode_media: {}
  episode_reward_max: 13.160000000000013
  episode_reward_mean: 6.598133333333343
  episode_reward_min: 0.5200000000000062
  episodes_this_iter: 150
  episodes_total: 11226
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9680988418750274
          entropy_coeff: 0.01
          kl: 0.015397815933490022
          policy_loss: -0.06448147247712581
          total_loss: 0.1504499674615506
          vf_explained_var: 0.9608657360076904
          vf_loss: 0.21122699549310228
    num_agent_steps_sampled: 879648
    num_agent_steps_trained: 879648
    num_steps_sampled: 879648
    num_steps_trained: 879648
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,88,36822.3,879648,6.59813,13.16,0.52,66.86




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 889644
  custom_metrics: {}
  date: 2021-11-07_08-29-29
  done: false
  episode_len_mean: 65.57236842105263
  episode_media: {}
  episode_reward_max: 13.150000000000013
  episode_reward_mean: 6.673289473684219
  episode_reward_min: 0.7799999999999992
  episodes_this_iter: 152
  episodes_total: 11378
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.916105506562779
          entropy_coeff: 0.01
          kl: 0.01672490643127442
          policy_loss: -0.06187501373565477
          total_loss: 0.1716296785367796
          vf_explained_var: 0.9582705497741699
          vf_loss: 0.22726479680237607
    num_agent_steps_sampled: 889644
    num_agent_steps_trained: 889644
    num_steps_sampled: 889644
    num_steps_trained: 889644
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,89,37407.3,889644,6.67329,13.15,0.78,65.5724




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 899640
  custom_metrics: {}
  date: 2021-11-07_08-37-01
  done: false
  episode_len_mean: 72.06474820143885
  episode_media: {}
  episode_reward_max: 13.060000000000013
  episode_reward_mean: 6.509352517985621
  episode_reward_min: 0.6900000000000038
  episodes_this_iter: 139
  episodes_total: 11517
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9607391949392792
          entropy_coeff: 0.01
          kl: 0.015235866916613999
          policy_loss: -0.06291615349742083
          total_loss: 0.1185712705931475
          vf_explained_var: 0.9592134952545166
          vf_loss: 0.17795534312215625
    num_agent_steps_sampled: 899640
    num_agent_steps_trained: 899640
    num_steps_sampled: 899640
    num_steps_trained: 899640
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,90,37859.1,899640,6.50935,13.06,0.69,72.0647




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 909636
  custom_metrics: {}
  date: 2021-11-07_08-44-49
  done: false
  episode_len_mean: 72.32608695652173
  episode_media: {}
  episode_reward_max: 13.740000000000023
  episode_reward_mean: 6.0213043478260975
  episode_reward_min: 0.1700000000000039
  episodes_this_iter: 138
  episodes_total: 11655
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.992036510227073
          entropy_coeff: 0.01
          kl: 0.01712160928025029
          policy_loss: -0.06499715113741719
          total_loss: 0.14995474404173817
          vf_explained_var: 0.9604886770248413
          vf_loss: 0.20886881726228784
    num_agent_steps_sampled: 909636
    num_agent_steps_trained: 909636
    num_steps_sampled: 909636
    num_steps_trained: 909636
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,91,38327.1,909636,6.0213,13.74,0.17,72.3261




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 919632
  custom_metrics: {}
  date: 2021-11-07_08-52-58
  done: false
  episode_len_mean: 75.78625954198473
  episode_media: {}
  episode_reward_max: 13.140000000000013
  episode_reward_mean: 6.641908396946576
  episode_reward_min: 0.689999999999999
  episodes_this_iter: 131
  episodes_total: 11786
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9598894487079392
          entropy_coeff: 0.01
          kl: 0.016135574731944415
          policy_loss: -0.06538624002988268
          total_loss: 0.11861908761227233
          vf_explained_var: 0.9609853029251099
          vf_loss: 0.17909831866526452
    num_agent_steps_sampled: 919632
    num_agent_steps_trained: 919632
    num_steps_sampled: 919632
    num_steps_trained: 919632
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,92,38816.1,919632,6.64191,13.14,0.69,75.7863




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 929628
  custom_metrics: {}
  date: 2021-11-07_09-01-03
  done: false
  episode_len_mean: 77.55038759689923
  episode_media: {}
  episode_reward_max: 13.140000000000015
  episode_reward_mean: 6.809069767441872
  episode_reward_min: 0.45000000000000207
  episodes_this_iter: 129
  episodes_total: 11915
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9597204053503836
          entropy_coeff: 0.01
          kl: 0.018634866710390566
          policy_loss: -0.06305280226863857
          total_loss: 0.15792404336488655
          vf_explained_var: 0.95921391248703
          vf_loss: 0.21227234629675365
    num_agent_steps_sampled: 929628
    num_agent_steps_trained: 929628
    num_steps_sampled: 929628
    num_steps_trained: 929628
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,93,39301.6,929628,6.80907,13.14,0.45,77.5504




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 939624
  custom_metrics: {}
  date: 2021-11-07_09-09-15
  done: false
  episode_len_mean: 72.83333333333333
  episode_media: {}
  episode_reward_max: 13.150000000000013
  episode_reward_mean: 6.534275362318851
  episode_reward_min: 0.8499999999999991
  episodes_this_iter: 138
  episodes_total: 12053
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9717351880847898
          entropy_coeff: 0.01
          kl: 0.015672205934206127
          policy_loss: -0.06620374041744786
          total_loss: 0.10661401526572613
          vf_explained_var: 0.9659060835838318
          vf_loss: 0.16873294503834002
    num_agent_steps_sampled: 939624
    num_agent_steps_trained: 939624
    num_steps_sampled: 939624
    num_steps_trained: 939624


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,94,39793.6,939624,6.53428,13.15,0.85,72.8333




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 949620
  custom_metrics: {}
  date: 2021-11-07_09-17-54
  done: false
  episode_len_mean: 74.06716417910448
  episode_media: {}
  episode_reward_max: 13.740000000000023
  episode_reward_mean: 6.8712686567164285
  episode_reward_min: 0.6099999999999989
  episodes_this_iter: 134
  episodes_total: 12187
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9580311856718144
          entropy_coeff: 0.01
          kl: 0.017295603615489315
          policy_loss: -0.06694112633808683
          total_loss: 0.12594817188751495
          vf_explained_var: 0.9623190760612488
          vf_loss: 0.18620191252129709
    num_agent_steps_sampled: 949620
    num_agent_steps_trained: 949620
    num_steps_sampled: 949620
    num_steps_trained: 949620

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,95,40312,949620,6.87127,13.74,0.61,74.0672




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 959616
  custom_metrics: {}
  date: 2021-11-07_09-24-59
  done: false
  episode_len_mean: 76.11363636363636
  episode_media: {}
  episode_reward_max: 13.16000000000001
  episode_reward_mean: 6.6692424242424355
  episode_reward_min: 0.6300000000000137
  episodes_this_iter: 132
  episodes_total: 12319
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.96193465442739
          entropy_coeff: 0.01
          kl: 0.01739579340319158
          policy_loss: -0.0638120859893214
          total_loss: 0.1659320669583021
          vf_explained_var: 0.9567668437957764
          vf_loss: 0.22294363842751735
    num_agent_steps_sampled: 959616
    num_agent_steps_trained: 959616
    num_steps_sampled: 959616
    num_steps_trained: 959616
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,96,40736.9,959616,6.66924,13.16,0.63,76.1136




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 969612
  custom_metrics: {}
  date: 2021-11-07_09-34-19
  done: false
  episode_len_mean: 73.4014598540146
  episode_media: {}
  episode_reward_max: 13.060000000000013
  episode_reward_mean: 6.3988321167883315
  episode_reward_min: -1.270000000000001
  episodes_this_iter: 137
  episodes_total: 12456
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9678082560881591
          entropy_coeff: 0.01
          kl: 0.015743040149504357
          policy_loss: -0.06603572685271501
          total_loss: 0.14000381818996407
          vf_explained_var: 0.9609586000442505
          vf_loss: 0.20180788488787973
    num_agent_steps_sampled: 969612
    num_agent_steps_trained: 969612
    num_steps_sampled: 969612
    num_steps_trained: 969612


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,97,41297.7,969612,6.39883,13.06,-1.27,73.4015




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 979608
  custom_metrics: {}
  date: 2021-11-07_09-43-54
  done: false
  episode_len_mean: 72.2043795620438
  episode_media: {}
  episode_reward_max: 13.120000000000013
  episode_reward_mean: 6.732919708029208
  episode_reward_min: 0.540000000000007
  episodes_this_iter: 137
  episodes_total: 12593
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9659313230433015
          entropy_coeff: 0.01
          kl: 0.016843675070435958
          policy_loss: -0.06284683041720308
          total_loss: 0.1498360829332318
          vf_explained_var: 0.96296626329422
          vf_loss: 0.20676089494019492
    num_agent_steps_sampled: 979608
    num_agent_steps_trained: 979608
    num_steps_sampled: 979608
    num_steps_trained: 979608
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,98,41871.8,979608,6.73292,13.12,0.54,72.2044




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 989604
  custom_metrics: {}
  date: 2021-11-07_09-51-29
  done: false
  episode_len_mean: 71.96428571428571
  episode_media: {}
  episode_reward_max: 13.200000000000012
  episode_reward_mean: 6.723357142857154
  episode_reward_min: 0.4900000000000093
  episodes_this_iter: 140
  episodes_total: 12733
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9588560407997198
          entropy_coeff: 0.01
          kl: 0.01632236647459309
          policy_loss: -0.061292891578478184
          total_loss: 0.15773566887101048
          vf_explained_var: 0.9673821926116943
          vf_loss: 0.21382752723323228
    num_agent_steps_sampled: 989604
    num_agent_steps_trained: 989604
    num_steps_sampled: 989604
    num_steps_trained: 989604


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,99,42327,989604,6.72336,13.2,0.49,71.9643




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 999600
  custom_metrics: {}
  date: 2021-11-07_09-59-27
  done: false
  episode_len_mean: 76.07692307692308
  episode_media: {}
  episode_reward_max: 13.930000000000023
  episode_reward_mean: 7.1128461538461645
  episode_reward_min: 0.28999999999999926
  episodes_this_iter: 130
  episodes_total: 12863
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9588598445949392
          entropy_coeff: 0.01
          kl: 0.019368911758238197
          policy_loss: -0.06133038869453992
          total_loss: 0.20236434711095613
          vf_explained_var: 0.9537079930305481
          vf_loss: 0.25386680093649616
    num_agent_steps_sampled: 999600
    num_agent_steps_trained: 999600
    num_steps_sampled: 999600
    num_steps_trained: 99960

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,100,42805.1,999600,7.11285,13.93,0.29,76.0769




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 1009596
  custom_metrics: {}
  date: 2021-11-07_10-06-33
  done: false
  episode_len_mean: 75.78030303030303
  episode_media: {}
  episode_reward_max: 13.090000000000012
  episode_reward_mean: 6.453181818181829
  episode_reward_min: 0.19999999999999896
  episodes_this_iter: 132
  episodes_total: 12995
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.989652015408899
          entropy_coeff: 0.01
          kl: 0.01557601890590079
          policy_loss: -0.06704330015648953
          total_loss: 0.13293330783868193
          vf_explained_var: 0.9581604599952698
          vf_loss: 0.19621704941949783
    num_agent_steps_sampled: 1009596
    num_agent_steps_trained: 1009596
    num_steps_sampled: 1009596
    num_steps_trained: 1009

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,101,43230.8,1009596,6.45318,13.09,0.2,75.7803




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 1019592
  custom_metrics: {}
  date: 2021-11-07_10-12-49
  done: false
  episode_len_mean: 83.24793388429752
  episode_media: {}
  episode_reward_max: 13.880000000000022
  episode_reward_mean: 6.417933884297533
  episode_reward_min: 0.20999999999999908
  episodes_this_iter: 121
  episodes_total: 13116
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 2.000498284132053
          entropy_coeff: 0.01
          kl: 0.019115124078971164
          policy_loss: -0.06361597511344232
          total_loss: 0.19341323748708536
          vf_explained_var: 0.951671838760376
          vf_loss: 0.24800310086809163
    num_agent_steps_sampled: 1019592
    num_agent_steps_trained: 1019592
    num_steps_sampled: 1019592
    num_steps_trained: 1019

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,102,43607.2,1019592,6.41793,13.88,0.21,83.2479




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 1029588
  custom_metrics: {}
  date: 2021-11-07_10-21-51
  done: false
  episode_len_mean: 73.37956204379562
  episode_media: {}
  episode_reward_max: 13.23000000000001
  episode_reward_mean: 6.972116788321178
  episode_reward_min: 0.47000000000001063
  episodes_this_iter: 137
  episodes_total: 13253
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9503994783784588
          entropy_coeff: 0.01
          kl: 0.017270510777481683
          policy_loss: -0.05977771165820523
          total_loss: 0.19015953180292605
          vf_explained_var: 0.9600978493690491
          vf_loss: 0.2432116504296915
    num_agent_steps_sampled: 1029588
    num_agent_steps_trained: 1029588
    num_steps_sampled: 1029588
    num_steps_trained: 1029

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,103,44149.6,1029588,6.97212,13.23,0.47,73.3796




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 1039584
  custom_metrics: {}
  date: 2021-11-07_10-29-49
  done: false
  episode_len_mean: 75.02255639097744
  episode_media: {}
  episode_reward_max: 13.300000000000011
  episode_reward_mean: 6.8440601503759515
  episode_reward_min: 0.37000000000000755
  episodes_this_iter: 133
  episodes_total: 13386
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9398967401594178
          entropy_coeff: 0.01
          kl: 0.019581972094898873
          policy_loss: -0.061906063634679355
          total_loss: 0.2302894369714981
          vf_explained_var: 0.9501460790634155
          vf_loss: 0.2818543490754743
    num_agent_steps_sampled: 1039584
    num_agent_steps_trained: 1039584
    num_steps_sampled: 1039584
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,104,44627.1,1039584,6.84406,13.3,0.37,75.0226




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 1049580
  custom_metrics: {}
  date: 2021-11-07_10-38-08
  done: false
  episode_len_mean: 72.44927536231884
  episode_media: {}
  episode_reward_max: 13.120000000000013
  episode_reward_mean: 6.534637681159431
  episode_reward_min: 0.870000000000006
  episodes_this_iter: 138
  episodes_total: 13524
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9866827788515988
          entropy_coeff: 0.01
          kl: 0.015754199450227113
          policy_loss: -0.06640081176590015
          total_loss: 0.1301200934490746
          vf_explained_var: 0.963744580745697
          vf_loss: 0.1924610428384736
    num_agent_steps_sampled: 1049580
    num_agent_steps_trained: 1049580
    num_steps_sampled: 1049580
    num_steps_trained: 1049580

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,105,45126.5,1049580,6.53464,13.12,0.87,72.4493




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 1059576
  custom_metrics: {}
  date: 2021-11-07_10-46-27
  done: false
  episode_len_mean: 75.68702290076335
  episode_media: {}
  episode_reward_max: 13.18000000000001
  episode_reward_mean: 6.5445801526717675
  episode_reward_min: 0.9800000000000046
  episodes_this_iter: 131
  episodes_total: 13655
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9880942145983378
          entropy_coeff: 0.01
          kl: 0.018279342094082295
          policy_loss: -0.060896016058758794
          total_loss: 0.16804654910786349
          vf_explained_var: 0.9545044898986816
          vf_loss: 0.22106175719418078
    num_agent_steps_sampled: 1059576
    num_agent_steps_trained: 1059576
    num_steps_sampled: 1059576
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,106,45624.7,1059576,6.54458,13.18,0.98,75.687




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 1069572
  custom_metrics: {}
  date: 2021-11-07_10-53-55
  done: false
  episode_len_mean: 76.07575757575758
  episode_media: {}
  episode_reward_max: 13.080000000000013
  episode_reward_mean: 7.048030303030314
  episode_reward_min: 0.610000000000008
  episodes_this_iter: 132
  episodes_total: 13787
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9486252482120807
          entropy_coeff: 0.01
          kl: 0.018821525360842504
          policy_loss: -0.06464985737688521
          total_loss: 0.21941518550818292
          vf_explained_var: 0.9493147134780884
          vf_loss: 0.27496610373640673
    num_agent_steps_sampled: 1069572
    num_agent_steps_trained: 1069572
    num_steps_sampled: 1069572
    num_steps_trained: 1069

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,107,46072.8,1069572,7.04803,13.08,0.61,76.0758




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 1079568
  custom_metrics: {}
  date: 2021-11-07_11-02-53
  done: false
  episode_len_mean: 71.15
  episode_media: {}
  episode_reward_max: 13.120000000000013
  episode_reward_mean: 6.688785714285724
  episode_reward_min: 0.3899999999999989
  episodes_this_iter: 140
  episodes_total: 13927
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.957770554530315
          entropy_coeff: 0.01
          kl: 0.0167547683698453
          policy_loss: -0.06370890694391779
          total_loss: 0.15719610650140123
          vf_explained_var: 0.9619985818862915
          vf_loss: 0.21503641551567448
    num_agent_steps_sampled: 1079568
    num_agent_steps_trained: 1079568
    num_steps_sampled: 1079568
    num_steps_trained: 1079568
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,108,46611.2,1079568,6.68879,13.12,0.39,71.15




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 1089564
  custom_metrics: {}
  date: 2021-11-07_11-10-03
  done: false
  episode_len_mean: 74.16911764705883
  episode_media: {}
  episode_reward_max: 13.190000000000012
  episode_reward_mean: 7.14389705882354
  episode_reward_min: 0.5700000000000017
  episodes_this_iter: 136
  episodes_total: 14063
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9331447803057158
          entropy_coeff: 0.01
          kl: 0.015617774330536046
          policy_loss: -0.06644396259743943
          total_loss: 0.10359196055791954
          vf_explained_var: 0.9740480780601501
          vf_loss: 0.16564787741846
    num_agent_steps_sampled: 1089564
    num_agent_steps_trained: 1089564
    num_steps_sampled: 1089564
    num_steps_trained: 1089564

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,109,47041.4,1089564,7.1439,13.19,0.57,74.1691




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 1099560
  custom_metrics: {}
  date: 2021-11-07_11-17-13
  done: false
  episode_len_mean: 81.1639344262295
  episode_media: {}
  episode_reward_max: 13.110000000000012
  episode_reward_mean: 6.7513934426229625
  episode_reward_min: 0.5799999999999987
  episodes_this_iter: 122
  episodes_total: 14185
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9658964401636367
          entropy_coeff: 0.01
          kl: 0.017279322994223002
          policy_loss: -0.0678972048816295
          total_loss: 0.13049507704046037
          vf_explained_var: 0.962393045425415
          vf_loss: 0.19180827589753346
    num_agent_steps_sampled: 1099560
    num_agent_steps_trained: 1099560
    num_steps_sampled: 1099560
    num_steps_trained: 10995

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,110,47471.1,1099560,6.75139,13.11,0.58,81.1639




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 1109556
  custom_metrics: {}
  date: 2021-11-07_11-23-59
  done: false
  episode_len_mean: 75.90151515151516
  episode_media: {}
  episode_reward_max: 14.080000000000021
  episode_reward_mean: 6.404469696969708
  episode_reward_min: 0.5099999999999993
  episodes_this_iter: 132
  episodes_total: 14317
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9881009569534889
          entropy_coeff: 0.01
          kl: 0.015739135069049757
          policy_loss: -0.06517239350777788
          total_loss: 0.14506495464911573
          vf_explained_var: 0.9581125378608704
          vf_loss: 0.20621454734832811
    num_agent_steps_sampled: 1109556
    num_agent_steps_trained: 1109556
    num_steps_sampled: 1109556
    num_steps_trained: 110

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,111,47876.7,1109556,6.40447,14.08,0.51,75.9015




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 1119552
  custom_metrics: {}
  date: 2021-11-07_11-32-19
  done: false
  episode_len_mean: 71.95652173913044
  episode_media: {}
  episode_reward_max: 13.080000000000014
  episode_reward_mean: 6.505869565217402
  episode_reward_min: 0.1499999999999991
  episodes_this_iter: 138
  episodes_total: 14455
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9695377122642648
          entropy_coeff: 0.01
          kl: 0.01829005864330086
          policy_loss: -0.06133826453334246
          total_loss: 0.17706851073676066
          vf_explained_var: 0.962715744972229
          vf_loss: 0.23032412678321712
    num_agent_steps_sampled: 1119552
    num_agent_steps_trained: 1119552
    num_steps_sampled: 1119552
    num_steps_trained: 11195

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,112,48376.6,1119552,6.50587,13.08,0.15,71.9565




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 1129548
  custom_metrics: {}
  date: 2021-11-07_11-41-57
  done: false
  episode_len_mean: 74.34558823529412
  episode_media: {}
  episode_reward_max: 13.210000000000013
  episode_reward_mean: 7.326838235294128
  episode_reward_min: 0.6900000000000016
  episodes_this_iter: 136
  episodes_total: 14591
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.936995648828327
          entropy_coeff: 0.01
          kl: 0.016131521573817285
          policy_loss: -0.06120770022464104
          total_loss: 0.14329509602951157
          vf_explained_var: 0.9682201147079468
          vf_loss: 0.19937300519842624
    num_agent_steps_sampled: 1129548
    num_agent_steps_trained: 1129548
    num_steps_sampled: 1129548
    num_steps_trained: 1129

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,113,48954.9,1129548,7.32684,13.21,0.69,74.3456




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 1139544
  custom_metrics: {}
  date: 2021-11-07_11-49-49
  done: false
  episode_len_mean: 72.97777777777777
  episode_media: {}
  episode_reward_max: 15.12000000000001
  episode_reward_mean: 6.583407407407417
  episode_reward_min: 0.49000000000000143
  episodes_this_iter: 135
  episodes_total: 14726
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9701714229379963
          entropy_coeff: 0.01
          kl: 0.01585199231720779
          policy_loss: -0.06275592167328438
          total_loss: 0.12993844379034117
          vf_explained_var: 0.9675049781799316
          vf_loss: 0.18832086626217406
    num_agent_steps_sampled: 1139544
    num_agent_steps_trained: 1139544
    num_steps_sampled: 1139544
    num_steps_trained: 1139

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,114,49426.4,1139544,6.58341,15.12,0.49,72.9778




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 1149540
  custom_metrics: {}
  date: 2021-11-07_11-58-34
  done: false
  episode_len_mean: 67.84353741496598
  episode_media: {}
  episode_reward_max: 13.740000000000023
  episode_reward_mean: 6.4917006802721176
  episode_reward_min: 0.38999999999999957
  episodes_this_iter: 147
  episodes_total: 14873
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9518594371966826
          entropy_coeff: 0.01
          kl: 0.016423984274897004
          policy_loss: -0.06575353018557414
          total_loss: 0.12775462025569545
          vf_explained_var: 0.9670725464820862
          vf_loss: 0.18808281908496322
    num_agent_steps_sampled: 1149540
    num_agent_steps_trained: 1149540
    num_steps_sampled: 1149540
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,115,49952.2,1149540,6.4917,13.74,0.39,67.8435




Result for PPO_my_env_b3578_00000:
  agent_timesteps_total: 1159536
  custom_metrics: {}
  date: 2021-11-07_12-06-01
  done: false
  episode_len_mean: 72.9136690647482
  episode_media: {}
  episode_reward_max: 14.11000000000002
  episode_reward_mean: 6.785971223021594
  episode_reward_min: 0.7300000000000036
  episodes_this_iter: 139
  episodes_total: 15012
  experiment_id: 5f28cb8af40d4f61a6ef0db2576d618a
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000005
          cur_lr: 0.00010000000000000002
          entropy: 1.9446334323312482
          entropy_coeff: 0.01
          kl: 0.016493640813618225
          policy_loss: -0.06176512789649841
          total_loss: 0.1456288130619587
          vf_explained_var: 0.9668768048286438
          vf_loss: 0.20179056025102224
    num_agent_steps_sampled: 1159536
    num_agent_steps_trained: 1159536
    num_steps_sampled: 1159536
    num_steps_trained: 115953

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_b3578_00000,RUNNING,192.168.3.5:492355,116,50399.2,1159536,6.78597,14.11,0.73,72.9137


Process _WandbLoggingProcess-1:
Traceback (most recent call last):
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/integration/wandb.py", line 200, in run
    result = self.queue.get()
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/queues.py", line 94, in get
    res = self._recv_bytes()
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/root/miniconda/en

Traceback (most recent call last):
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ipython-7.25.0-py3.7.egg/IPython/core/interactiveshell.py", line 3441, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_492241/2977992906.py", line 34, in <module>
    checkpoint_at_end=True)
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/tune.py", line 532, in run
    runner.step()
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 554, in step
    self._process_events(timeout=timeout)  # blocking
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 675, in _process_events
    timeout=timeout)  # blocking
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 718, in get_next_available_trial
    ready, _ = ray.wait(shuffled_results, timeout=timeout)
  File "/root/miniconda/envs/py37/lib/python3.7/sit

TypeError: object of type 'NoneType' has no len()