In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=2, stride=2, padding=0),  
            nn.ELU(),
            nn.Conv2d(32, 32, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(32, 64, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(64, 128, kernel_size=2, stride=2, padding=0),
            nn.ELU(), 
            nn.Conv2d(128, 256, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Conv2d(256, 512, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
class TransformerEncoderLayer(nn.Module):
    def __init__(self, d_model, num_heads, dim_feedforward=None, activation=nn.ELU):
        super().__init__()
        if dim_feedforward is None:
            dim_feedforward = 4 * d_model
        self.self_attn = nn.MultiheadAttention(d_model, num_heads, dropout=0.0, batch_first=True)
        # Implementation of feedforward model
        self.linear1 = nn.Linear(d_model, dim_feedforward)
        self.linear2 = nn.Linear(dim_feedforward, d_model)
        self.activation = activation()
        
    def forward(self, query, key, value):
        src = query
        src2 = self.self_attn(query=query, key=key, value=value)[0]
        src = src + src2
        src2 = self.linear2(self.activation(self.linear1(src2)))
        src = src + src2
        return src

In [4]:
class FusionNet(nn.Module):
    def __init__(self, d_model=8, num_heads=1):
        super().__init__()
        self.img_preproc = nn.Sequential(
            nn.Linear(512, 2048),
            nn.ELU(),
        )
        
        self.target_cross_attn_1 = TransformerEncoderLayer(d_model=d_model, num_heads=num_heads)
        self.img_cross_attn_1 = TransformerEncoderLayer(d_model=d_model, num_heads=num_heads)
        self.conv_1 = nn.Conv3d(d_model, 2 * d_model, kernel_size=3, stride=1)
        self.act_1 = nn.ELU()
        
        self.target_cross_attn_2 = TransformerEncoderLayer(d_model=2 * d_model, num_heads=num_heads)
        self.img_cross_attn_2 = TransformerEncoderLayer(d_model=2 * d_model, num_heads=num_heads)
        self.conv_2 = nn.Conv3d(2 * d_model, 4 * d_model, kernel_size=3, stride=1)
        self.act_2 = nn.ELU()
        
        self.target_cross_attn_3 = TransformerEncoderLayer(d_model=4 * d_model, num_heads=num_heads)
        self.img_cross_attn_3 = TransformerEncoderLayer(d_model=4 * d_model, num_heads=num_heads)
        self.conv_3 = nn.Conv3d(4 * d_model, 8 * d_model, kernel_size=3, stride=1)
        self.act_3 = nn.ELU()
        
        self.target_cross_attn_4 = TransformerEncoderLayer(d_model=8 * d_model, num_heads=num_heads)
        self.img_cross_attn_4 = TransformerEncoderLayer(d_model=8 * d_model, num_heads=num_heads)
        self.conv_4 = nn.Conv3d(8 * d_model, 16 * d_model, kernel_size=3, stride=1)
        self.act_4 = nn.ELU()
        
        self.max_pool = nn.MaxPool3d(kernel_size=(1, 3, 3))
        
        
    def forward(self, target, img_features):
        batch_size = target.shape[0]
        
        img_features = self.img_preproc(img_features)
        
        # layer 1
        target = target.permute(0, 2, 3, 4, 1).reshape(batch_size, 9*11*11, 8)
        img = img_features.reshape(batch_size, 256, 8)
        target_1 = self.target_cross_attn_1(query=target, key=img, value=img)
        img_1 = self.img_cross_attn_1(query=img, key=target, value=target)
        target_1 = target_1.reshape(batch_size, 9, 11, 11, 8).permute(0, 4, 1, 2, 3)
        target_1 = self.act_1(self.conv_1(target_1))
        img_1 = img_1.reshape(batch_size, 2048)
        
        # layer 2
        target_1 = target_1.permute(0, 2, 3, 4, 1).reshape(batch_size, 7*9*9, 16)
        img_1 = img_1.reshape(batch_size, 128, 16)
        target_2 = self.target_cross_attn_2(query=target_1, key=img_1, value=img_1)
        img_2 = self.img_cross_attn_2(query=img_1, key=target_1, value=target_1)
        target_2 = target_2.reshape(batch_size, 7, 9, 9, 16).permute(0, 4, 1, 2, 3)
        target_2 = self.act_2(self.conv_2(target_2))
        img_2 = img_2.reshape(batch_size, 2048)
        
        # layer 3
        target_2 = target_2.permute(0, 2, 3, 4, 1).reshape(batch_size, 5*7*7, 32)
        img_2 = img_2.reshape(batch_size, 64, 32)
        target_3 = self.target_cross_attn_3(query=target_2, key=img_2, value=img_2)
        img_3 = self.img_cross_attn_3(query=img_2, key=target_2, value=target_2)
        target_3 = target_3.reshape(batch_size, 5, 7, 7, 32).permute(0, 4, 1, 2, 3)
        target_3 = self.act_3(self.conv_3(target_3))
        img_3 = img_3.reshape(batch_size, 2048)
        
        # layer 4
        target_3 = target_3.permute(0, 2, 3, 4, 1).reshape(batch_size, 3*5*5, 64)
        img_3 = img_3.reshape(batch_size, 32, 64)
        target_4 = self.target_cross_attn_4(query=target_3, key=img_3, value=img_3)
        img_4 = self.img_cross_attn_4(query=img_3, key=target_3, value=target_3)
        target_4 = target_4.reshape(batch_size, 3, 5, 5, 64).permute(0, 4, 1, 2, 3)
        target_4 = self.act_4(self.conv_4(target_4))
        img_4 = img_4.reshape(batch_size, 2048)
        
        
        target_4 = self.max_pool(target_4)
        
        features = target_4.reshape(batch_size, -1)
        
        return features

In [5]:
from torch.nn.functional import one_hot

class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        visual_features_dim = 512
        target_features_dim = 9 * 11 * 11 
        self.visual_encoder = VisualEncoder()
        self.visual_encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AngelaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.target_encoder = nn.Sequential(
            nn.Conv3d(7, 8, kernel_size=1, stride=1, padding=0),
            nn.ELU(),
        )
        policy_hidden_dim = 128 
        self.policy_network = FusionNet()
        
        self.action_head = nn.Linear(policy_hidden_dim, action_space.n)
        self.value_head = nn.Linear(policy_hidden_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.visual_encoder.cuda()
            self.target_encoder.cuda()
            self.policy_network.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs']
        pov = obs['pov'].permute(0, 3, 1, 2).float() / 255.0
        target = one_hot(obs['target_grid'].long(), num_classes=7).permute(0, 4, 1, 2, 3).float()
        if self.use_cuda:
            pov.cuda()
            target.cuda()
            
        with torch.no_grad():
            visual_features = self.visual_encoder(pov)
            
        target_features = self.target_encoder(target)
        
        features = self.policy_network(target_features, visual_features)
        
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [6]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [7]:
class VisualObservationWrapper(ObsWrapper):
    def __init__(self, env, include_target=False):
        super().__init__(env)
        self.observation_space = {   
            'pov': gym.spaces.Box(low=0, high=255, shape=(64, 64, 3)),
            'inventory': gym.spaces.Box(low=0.0, high=20.0, shape=(6,)),
            'compass': gym.spaces.Box(low=-180.0, high=180.0, shape=(1,))
        }
        if include_target:
            self.observation_space['target_grid'] = \
                gym.spaces.Box(low=0, high=6, shape=(9, 11, 11))
        self.observation_space = gym.spaces.Dict(self.observation_space)

    def observation(self, obs, reward=None, done=None, info=None):
        if info is not None:
            if 'target_grid' in info:
                target_grid = info['target_grid']
                del info['target_grid']
            else:
                logger.error(f'info: {info}')
                if hasattr(self.unwrapped, 'should_reset'):
                    self.unwrapped.should_reset(True)
                target_grid = self.env.unwrapped.tasks.current.target_grid
        else:
            target_grid = self.env.unwrapped.tasks.current.target_grid
        return {
            'pov': obs['pov'].astype(np.float32),
            'inventory': obs['inventory'],
            'compass': np.array([obs['compass']['angle'].item()]),
            'target_grid': target_grid
        }

In [8]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

tasks = []
for i in range(1,156):
    if ('C'+str(i)) == 'C38': continue
    tasks.append('C'+str(i))
    
class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, rew):
        if rew == 0:
            rew = -0.01
        if abs(rew) == 1:
            rew /= 10
            
        return rew
    
def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=250)
    env.update_taskset(TaskSet(preset=tasks))
    #env = PovOnlyWrapper(env)
    env = VisualObservationWrapper(env, include_target=True)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    env = RewardWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 3,
             "sgd_minibatch_size": 128,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 5_000,
             #"lr": 1e-4,
             #"gamma": 0.99,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO All Tasks pretrained (visual pretrained AngelaCNN + CrossAttn) (3 noops after placement) r: -0.01 div10"
                  }
              }

        },
        loggers=[WandbLogger],
        local_dir="/IGLU-Minecraft/checkpoints/all_tasks_cross_attn",
        keep_checkpoints_num=50,
        checkpoint_freq=5,
        checkpoint_at_end=True)

2021-11-14 14:38:42,674	INFO wandb.py:170 -- Already logged into W&B.
2021-11-14 14:38:42,686	ERROR syncer.py:72 -- Log sync requires rsync to be installed.


Trial name,status,loc
PPO_my_env_907c1_00000,RUNNING,


[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.6 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=154354)[0m 2021-11-14 14:38:46,158	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=154354)[0m 2021-11-14 14:38:46,158	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=154354)[0m 2021-11-14 14:38:54,731	INFO trainable.py:109 -- Trainable.setup took 11.081 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 9996
  custom_metrics: {}
  date: 2021-11-14_14-45-00
  done: false
  episode_len_mean: 99.03030303030303
  episode_media: {}
  episode_reward_max: 4.700000000000003
  episode_reward_mean: -0.5804040404040409
  episode_reward_min: -1.450000000000001
  episodes_this_iter: 99
  episodes_total: 99
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.8841074453459847
          entropy_coeff: 0.01
          kl: 0.004704763754277773
          policy_loss: -0.011509279726853228
          total_loss: 0.040411061462030835
          vf_explained_var: -0.3121291697025299
          vf_loss: 0.07982046209319503
    num_agent_steps_sampled: 9996
    num_agent_steps_trained: 9996
    num_steps_sampled: 9996
    num_steps_trained: 9996
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,1,366.176,9996,-0.580404,4.7,-1.45,99.0303


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 19992
  custom_metrics: {}
  date: 2021-11-14_14-49-26
  done: false
  episode_len_mean: 100.59
  episode_media: {}
  episode_reward_max: 4.930000000000001
  episode_reward_mean: -0.7361000000000005
  episode_reward_min: -1.6000000000000008
  episodes_this_iter: 100
  episodes_total: 199
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.872266720400916
          entropy_coeff: 0.01
          kl: 0.007915726438018678
          policy_loss: -0.01592555729767833
          total_loss: 0.020403559366241098
          vf_explained_var: -0.07412627339363098
          vf_loss: 0.06426021011280589
    num_agent_steps_sampled: 19992
    num_agent_steps_trained: 19992
    num_steps_sampled: 19992
    num_steps_trained: 19992
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,2,631.563,19992,-0.7361,4.93,-1.6,100.59


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 29988
  custom_metrics: {}
  date: 2021-11-14_14-53-56
  done: false
  episode_len_mean: 100.37
  episode_media: {}
  episode_reward_max: 4.730000000000006
  episode_reward_mean: 0.15750000000000042
  episode_reward_min: -1.730000000000001
  episodes_this_iter: 99
  episodes_total: 298
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.839373106528551
          entropy_coeff: 0.01
          kl: 0.010519051259605366
          policy_loss: -0.021232346243137478
          total_loss: 0.18197832048749232
          vf_explained_var: 0.0877896323800087
          vf_loss: 0.23055249153393614
    num_agent_steps_sampled: 29988
    num_agent_steps_trained: 29988
    num_steps_sampled: 29988
    num_steps_trained: 29988
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,3,901.434,29988,0.1575,4.73,-1.73,100.37




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 39984
  custom_metrics: {}
  date: 2021-11-14_14-58-51
  done: false
  episode_len_mean: 97.14563106796116
  episode_media: {}
  episode_reward_max: 4.710000000000005
  episode_reward_mean: 0.5757281553398065
  episode_reward_min: -1.8300000000000007
  episodes_this_iter: 103
  episodes_total: 401
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.811861156194638
          entropy_coeff: 0.01
          kl: 0.011586275796279816
          policy_loss: -0.02427993158284479
          total_loss: 0.2371059584005489
          vf_explained_var: 0.19990791380405426
          vf_loss: 0.28834587312820886
    num_agent_steps_sampled: 39984
    num_agent_steps_trained: 39984
    num_steps_sampled: 39984
    num_steps_trained: 39984
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,4,1196.43,39984,0.575728,4.71,-1.83,97.1456


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 49980
  custom_metrics: {}
  date: 2021-11-14_15-03-31
  done: false
  episode_len_mean: 97.65686274509804
  episode_media: {}
  episode_reward_max: 8.530000000000014
  episode_reward_mean: 1.14294117647059
  episode_reward_min: -1.800000000000001
  episodes_this_iter: 102
  episodes_total: 503
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.7914847830421903
          entropy_coeff: 0.01
          kl: 0.01291240033164486
          policy_loss: -0.02431462747721463
          total_loss: 0.38614972693065547
          vf_explained_var: 0.2639394998550415
          vf_loss: 0.437087961875348
    num_agent_steps_sampled: 49980
    num_agent_steps_trained: 49980
    num_steps_sampled: 49980
    num_steps_trained: 49980
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,5,1476.74,49980,1.14294,8.53,-1.8,97.6569


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 59976
  custom_metrics: {}
  date: 2021-11-14_15-08-10
  done: false
  episode_len_mean: 99.87128712871286
  episode_media: {}
  episode_reward_max: 10.260000000000012
  episode_reward_mean: 1.2247524752475272
  episode_reward_min: -1.9499999999999995
  episodes_this_iter: 101
  episodes_total: 604
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.7712131653076564
          entropy_coeff: 0.01
          kl: 0.012632949608454996
          policy_loss: -0.02826809365159044
          total_loss: 0.3065449736855176
          vf_explained_var: 0.37885403633117676
          vf_loss: 0.36126190269541025
    num_agent_steps_sampled: 59976
    num_agent_steps_trained: 59976
    num_steps_sampled: 59976
    num_steps_trained: 59976
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,6,1755.41,59976,1.22475,10.26,-1.95,99.8713




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 69972
  custom_metrics: {}
  date: 2021-11-14_15-13-05
  done: false
  episode_len_mean: 100.58
  episode_media: {}
  episode_reward_max: 6.530000000000015
  episode_reward_mean: 1.2506000000000028
  episode_reward_min: -1.9600000000000009
  episodes_this_iter: 99
  episodes_total: 703
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.747085771805201
          entropy_coeff: 0.01
          kl: 0.019294186270850836
          policy_loss: -0.034431641933341056
          total_loss: 0.2905691921695048
          vf_explained_var: 0.4129098951816559
          vf_loss: 0.35054227306117486
    num_agent_steps_sampled: 69972
    num_agent_steps_trained: 69972
    num_steps_sampled: 69972
    num_steps_trained: 69972
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,7,2050.6,69972,1.2506,6.53,-1.96,100.58




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 79968
  custom_metrics: {}
  date: 2021-11-14_15-18-05
  done: false
  episode_len_mean: 99.54455445544555
  episode_media: {}
  episode_reward_max: 4.820000000000013
  episode_reward_mean: 0.9281188118811902
  episode_reward_min: -1.9200000000000013
  episodes_this_iter: 101
  episodes_total: 804
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.716490948302114
          entropy_coeff: 0.01
          kl: 0.0277014662107412
          policy_loss: -0.038532258618352376
          total_loss: 0.23172735154517313
          vf_explained_var: 0.5254629254341125
          vf_loss: 0.2946543720467255
    num_agent_steps_sampled: 79968
    num_agent_steps_trained: 79968
    num_steps_sampled: 79968
    num_steps_trained: 79968
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,8,2350.99,79968,0.928119,4.82,-1.92,99.5446


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 89964
  custom_metrics: {}
  date: 2021-11-14_15-22-46
  done: false
  episode_len_mean: 101.49
  episode_media: {}
  episode_reward_max: 6.870000000000013
  episode_reward_mean: 0.8922000000000024
  episode_reward_min: -2.05
  episodes_this_iter: 98
  episodes_total: 902
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15
          cur_lr: 5.000000000000001e-05
          entropy: 2.6958542383634128
          entropy_coeff: 0.01
          kl: 0.029198183727480908
          policy_loss: -0.03811441195261084
          total_loss: 0.21304377028439989
          vf_explained_var: 0.549401044845581
          vf_loss: 0.2737369965594739
    num_agent_steps_sampled: 89964
    num_agent_steps_trained: 89964
    num_steps_sampled: 89964
    num_steps_trained: 89964
  iterations_since_restore: 9
  node_ip: 192.168.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,9,2631.39,89964,0.8922,6.87,-2.05,101.49


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 99960
  custom_metrics: {}
  date: 2021-11-14_15-27-31
  done: false
  episode_len_mean: 98.76470588235294
  episode_media: {}
  episode_reward_max: 8.750000000000012
  episode_reward_mean: 1.419803921568631
  episode_reward_min: -2.000000000000001
  episodes_this_iter: 102
  episodes_total: 1004
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22500000000000006
          cur_lr: 5.000000000000001e-05
          entropy: 2.6617645257558578
          entropy_coeff: 0.01
          kl: 0.03276358538475574
          policy_loss: -0.044040175498678134
          total_loss: 0.23528799822327132
          vf_explained_var: 0.5880103707313538
          vf_loss: 0.2985740117099868
    num_agent_steps_sampled: 99960
    num_agent_steps_trained: 99960
    num_steps_sampled: 99960
    num_steps_trained: 99960
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,10,2916.89,99960,1.4198,8.75,-2,98.7647




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 109956
  custom_metrics: {}
  date: 2021-11-14_15-32-35
  done: false
  episode_len_mean: 95.24038461538461
  episode_media: {}
  episode_reward_max: 8.360000000000014
  episode_reward_mean: 1.7176923076923116
  episode_reward_min: -1.6100000000000005
  episodes_this_iter: 104
  episodes_total: 1108
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.33749999999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.6492050871889816
          entropy_coeff: 0.01
          kl: 0.029999021829840426
          policy_loss: -0.04716765971070267
          total_loss: 0.20583039400322983
          vf_explained_var: 0.6615533828735352
          vf_loss: 0.2693654337563576
    num_agent_steps_sampled: 109956
    num_agent_steps_trained: 109956
    num_steps_sampled: 109956
    num_steps_trained: 109956
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,11,3220.9,109956,1.71769,8.36,-1.61,95.2404


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 119952
  custom_metrics: {}
  date: 2021-11-14_15-37-24
  done: false
  episode_len_mean: 96.23076923076923
  episode_media: {}
  episode_reward_max: 8.760000000000005
  episode_reward_mean: 1.5182692307692345
  episode_reward_min: -2.0200000000000005
  episodes_this_iter: 104
  episodes_total: 1212
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.50625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6219841663654035
          entropy_coeff: 0.01
          kl: 0.028015319427843768
          policy_loss: -0.046292343868826254
          total_loss: 0.19925196828304703
          vf_explained_var: 0.6058682203292847
          vf_loss: 0.2575813973823992
    num_agent_steps_sampled: 119952
    num_agent_steps_trained: 119952
    num_steps_sampled: 119952
    num_steps_trained: 119952
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,12,3509.05,119952,1.51827,8.76,-2.02,96.2308


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 129948
  custom_metrics: {}
  date: 2021-11-14_15-42-11
  done: false
  episode_len_mean: 96.58653846153847
  episode_media: {}
  episode_reward_max: 10.190000000000015
  episode_reward_mean: 1.6795192307692342
  episode_reward_min: -2.1100000000000003
  episodes_this_iter: 104
  episodes_total: 1316
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7593750000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.6332171709109575
          entropy_coeff: 0.01
          kl: 0.024243443993249278
          policy_loss: -0.050442353914627154
          total_loss: 0.24711239260569628
          vf_explained_var: 0.5755695104598999
          vf_loss: 0.3054770540095802
    num_agent_steps_sampled: 129948
    num_agent_steps_trained: 129948
    num_steps_sampled: 129948
    num_steps_trained: 129948


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,13,3796.48,129948,1.67952,10.19,-2.11,96.5865




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 139944
  custom_metrics: {}
  date: 2021-11-14_15-47-16
  done: false
  episode_len_mean: 95.20192307692308
  episode_media: {}
  episode_reward_max: 6.870000000000012
  episode_reward_mean: 1.2180769230769264
  episode_reward_min: -1.8100000000000007
  episodes_this_iter: 104
  episodes_total: 1420
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1390624999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.614535803468818
          entropy_coeff: 0.01
          kl: 0.02094004718509212
          policy_loss: -0.049892968075891206
          total_loss: 0.17042035843667566
          vf_explained_var: 0.6678788661956787
          vf_loss: 0.2226066606091415
    num_agent_steps_sampled: 139944
    num_agent_steps_trained: 139944
    num_steps_sampled: 139944
    num_steps_trained: 139944
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,14,4101.07,139944,1.21808,6.87,-1.81,95.2019




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 149940
  custom_metrics: {}
  date: 2021-11-14_15-52-12
  done: false
  episode_len_mean: 95.82692307692308
  episode_media: {}
  episode_reward_max: 8.470000000000017
  episode_reward_mean: 1.651826923076927
  episode_reward_min: -1.7800000000000007
  episodes_this_iter: 104
  episodes_total: 1524
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7085937500000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.5801324088349302
          entropy_coeff: 0.01
          kl: 0.019684808400661603
          policy_loss: -0.04797709975670227
          total_loss: 0.19475129989958886
          vf_explained_var: 0.7160896062850952
          vf_loss: 0.23489638360647055
    num_agent_steps_sampled: 149940
    num_agent_steps_trained: 149940
    num_steps_sampled: 149940
    num_steps_trained: 149940
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,15,4397.38,149940,1.65183,8.47,-1.78,95.8269


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 159936
  custom_metrics: {}
  date: 2021-11-14_15-56-54
  done: false
  episode_len_mean: 96.11428571428571
  episode_media: {}
  episode_reward_max: 8.81000000000001
  episode_reward_mean: 1.754476190476195
  episode_reward_min: -2.289999999999996
  episodes_this_iter: 105
  episodes_total: 1629
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7085937500000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.5887092774749823
          entropy_coeff: 0.01
          kl: 0.01763131987004727
          policy_loss: -0.05092837934055899
          total_loss: 0.16808417862058322
          vf_explained_var: 0.7048277258872986
          vf_loss: 0.21477488728088892
    num_agent_steps_sampled: 159936
    num_agent_steps_trained: 159936
    num_steps_sampled: 159936
    num_steps_trained: 159936
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,16,4679.75,159936,1.75448,8.81,-2.29,96.1143


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 169932
  custom_metrics: {}
  date: 2021-11-14_16-01-37
  done: false
  episode_len_mean: 96.94174757281553
  episode_media: {}
  episode_reward_max: 8.64000000000002
  episode_reward_mean: 1.5957281553398093
  episode_reward_min: -1.9100000000000013
  episodes_this_iter: 103
  episodes_total: 1732
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7085937500000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.600336382429824
          entropy_coeff: 0.01
          kl: 0.015568854749177555
          policy_loss: -0.056665287115491736
          total_loss: 0.12470793101486838
          vf_explained_var: 0.7119825482368469
          vf_loss: 0.18077573356552956
    num_agent_steps_sampled: 169932
    num_agent_steps_trained: 169932
    num_steps_sampled: 169932
    num_steps_trained: 169932
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,17,4962.59,169932,1.59573,8.64,-1.91,96.9417




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 179928
  custom_metrics: {}
  date: 2021-11-14_16-06-39
  done: false
  episode_len_mean: 92.03669724770643
  episode_media: {}
  episode_reward_max: 9.740000000000014
  episode_reward_mean: 1.929816513761472
  episode_reward_min: -1.7500000000000009
  episodes_this_iter: 109
  episodes_total: 1841
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7085937500000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.5967230091747058
          entropy_coeff: 0.01
          kl: 0.021757631094439362
          policy_loss: -0.045536717239958356
          total_loss: 0.2753134146675022
          vf_explained_var: 0.6728546023368835
          vf_loss: 0.30964240723838793
    num_agent_steps_sampled: 179928
    num_agent_steps_trained: 179928
    num_steps_sampled: 179928
    num_steps_trained: 179928
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,18,5264.19,179928,1.92982,9.74,-1.75,92.0367




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 189924
  custom_metrics: {}
  date: 2021-11-14_16-11-47
  done: false
  episode_len_mean: 95.28571428571429
  episode_media: {}
  episode_reward_max: 8.680000000000007
  episode_reward_mean: 1.5261904761904797
  episode_reward_min: -2.0900000000000007
  episodes_this_iter: 105
  episodes_total: 1946
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5923866663223656
          entropy_coeff: 0.01
          kl: 0.014990808165429826
          policy_loss: -0.05124915889424519
          total_loss: 0.17733568799896882
          vf_explained_var: 0.725115954875946
          vf_loss: 0.21608891286489226
    num_agent_steps_sampled: 189924
    num_agent_steps_trained: 189924
    num_steps_sampled: 189924
    num_steps_trained: 189924
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,19,5572.06,189924,1.52619,8.68,-2.09,95.2857


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 199920
  custom_metrics: {}
  date: 2021-11-14_16-16-30
  done: false
  episode_len_mean: 96.14423076923077
  episode_media: {}
  episode_reward_max: 8.810000000000013
  episode_reward_mean: 1.977019230769235
  episode_reward_min: -1.660000000000001
  episodes_this_iter: 104
  episodes_total: 2050
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5920385914990027
          entropy_coeff: 0.01
          kl: 0.014553636918623747
          policy_loss: -0.050832802840731404
          total_loss: 0.1898929063636714
          vf_explained_var: 0.6800243258476257
          vf_loss: 0.22934671548975266
    num_agent_steps_sampled: 199920
    num_agent_steps_trained: 199920
    num_steps_sampled: 199920
    num_steps_trained: 199920
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,20,5854.99,199920,1.97702,8.81,-1.66,96.1442




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 209916
  custom_metrics: {}
  date: 2021-11-14_16-21-29
  done: false
  episode_len_mean: 95.3076923076923
  episode_media: {}
  episode_reward_max: 12.690000000000014
  episode_reward_mean: 1.4474038461538492
  episode_reward_min: -2.229999999999996
  episodes_this_iter: 104
  episodes_total: 2154
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.573831805612287
          entropy_coeff: 0.01
          kl: 0.01478891218465308
          policy_loss: -0.054327342605106854
          total_loss: 0.14469867380073245
          vf_explained_var: 0.7236766219139099
          vf_loss: 0.18686197163720225
    num_agent_steps_sampled: 209916
    num_agent_steps_trained: 209916
    num_steps_sampled: 209916
    num_steps_trained: 209916
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,21,6154.5,209916,1.4474,12.69,-2.23,95.3077




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 219912
  custom_metrics: {}
  date: 2021-11-14_16-26-28
  done: false
  episode_len_mean: 94.97169811320755
  episode_media: {}
  episode_reward_max: 8.540000000000015
  episode_reward_mean: 1.9624528301886834
  episode_reward_min: -1.9100000000000008
  episodes_this_iter: 106
  episodes_total: 2260
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.577983933738154
          entropy_coeff: 0.01
          kl: 0.016482307783089026
          policy_loss: -0.05310742948920681
          total_loss: 0.20362163194630326
          vf_explained_var: 0.7068442702293396
          vf_loss: 0.24026654987699456
    num_agent_steps_sampled: 219912
    num_agent_steps_trained: 219912
    num_steps_sampled: 219912
    num_steps_trained: 219912
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,22,6453.21,219912,1.96245,8.54,-1.91,94.9717


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 229908
  custom_metrics: {}
  date: 2021-11-14_16-31-12
  done: false
  episode_len_mean: 94.78095238095239
  episode_media: {}
  episode_reward_max: 8.540000000000017
  episode_reward_mean: 1.8037142857142896
  episode_reward_min: -2.0599999999999996
  episodes_this_iter: 105
  episodes_total: 2365
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5670608746699797
          entropy_coeff: 0.01
          kl: 0.01626357898875941
          policy_loss: -0.052882363696972655
          total_loss: 0.20770867521452727
          vf_explained_var: 0.7249283194541931
          vf_loss: 0.24457987422664834
    num_agent_steps_sampled: 229908
    num_agent_steps_trained: 229908
    num_steps_sampled: 229908
    num_steps_trained: 229908
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,23,6737.41,229908,1.80371,8.54,-2.06,94.781


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 239904
  custom_metrics: {}
  date: 2021-11-14_16-35-57
  done: false
  episode_len_mean: 94.12264150943396
  episode_media: {}
  episode_reward_max: 12.420000000000016
  episode_reward_mean: 1.6654716981132118
  episode_reward_min: -2.0200000000000005
  episodes_this_iter: 106
  episodes_total: 2471
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5606892810927495
          entropy_coeff: 0.01
          kl: 0.015767214697755196
          policy_loss: -0.05150007929127568
          total_loss: 0.16309441970163782
          vf_explained_var: 0.7561711072921753
          vf_loss: 0.19979174573722686
    num_agent_steps_sampled: 239904
    num_agent_steps_trained: 239904
    num_steps_sampled: 239904
    num_steps_trained: 239904
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,24,7022.31,239904,1.66547,12.42,-2.02,94.1226




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 249900
  custom_metrics: {}
  date: 2021-11-14_16-41-11
  done: false
  episode_len_mean: 90.7909090909091
  episode_media: {}
  episode_reward_max: 10.810000000000011
  episode_reward_mean: 1.8768181818181864
  episode_reward_min: -2.229999999999999
  episodes_this_iter: 110
  episodes_total: 2581
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.584720910920037
          entropy_coeff: 0.01
          kl: 0.015708214264731103
          policy_loss: -0.05371588069834134
          total_loss: 0.19130616118737426
          vf_explained_var: 0.7147061228752136
          vf_loss: 0.23061081553674023
    num_agent_steps_sampled: 249900
    num_agent_steps_trained: 249900
    num_steps_sampled: 249900
    num_steps_trained: 249900
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,25,7335.72,249900,1.87682,10.81,-2.23,90.7909


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 259896
  custom_metrics: {}
  date: 2021-11-14_16-45-53
  done: false
  episode_len_mean: 95.88571428571429
  episode_media: {}
  episode_reward_max: 10.820000000000014
  episode_reward_mean: 1.9579047619047671
  episode_reward_min: -2.0700000000000007
  episodes_this_iter: 105
  episodes_total: 2686
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5596622325416303
          entropy_coeff: 0.01
          kl: 0.015764957250735318
          policy_loss: -0.05896871415420603
          total_loss: 0.16775916708537783
          vf_explained_var: 0.7666202187538147
          vf_loss: 0.21192064354371312
    num_agent_steps_sampled: 259896
    num_agent_steps_trained: 259896
    num_steps_sampled: 259896
    num_steps_trained: 259896
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,26,7618.06,259896,1.9579,10.82,-2.07,95.8857




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 269892
  custom_metrics: {}
  date: 2021-11-14_16-51-04
  done: false
  episode_len_mean: 93.59433962264151
  episode_media: {}
  episode_reward_max: 6.600000000000014
  episode_reward_mean: 1.6884905660377398
  episode_reward_min: -2.2199999999999993
  episodes_this_iter: 106
  episodes_total: 2792
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.573404266895392
          entropy_coeff: 0.01
          kl: 0.014795967476210993
          policy_loss: -0.060399559496814366
          total_loss: 0.1477896484070752
          vf_explained_var: 0.7464619278907776
          vf_loss: 0.1960028048023645
    num_agent_steps_sampled: 269892
    num_agent_steps_trained: 269892
    num_steps_sampled: 269892
    num_steps_trained: 269892
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,27,7929.08,269892,1.68849,6.6,-2.22,93.5943




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 279888
  custom_metrics: {}
  date: 2021-11-14_16-56-02
  done: false
  episode_len_mean: 93.71296296296296
  episode_media: {}
  episode_reward_max: 11.010000000000014
  episode_reward_mean: 2.0086111111111147
  episode_reward_min: -1.800000000000001
  episodes_this_iter: 108
  episodes_total: 2900
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5691857629352146
          entropy_coeff: 0.01
          kl: 0.016589410967632265
          policy_loss: -0.05413030481172933
          total_loss: 0.18442133229751234
          vf_explained_var: 0.7092527151107788
          vf_loss: 0.22172664952758922
    num_agent_steps_sampled: 279888
    num_agent_steps_trained: 279888
    num_steps_sampled: 279888
    num_steps_trained: 279888
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,28,8226.58,279888,2.00861,11.01,-1.8,93.713




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 289884
  custom_metrics: {}
  date: 2021-11-14_17-01-04
  done: false
  episode_len_mean: 94.14150943396227
  episode_media: {}
  episode_reward_max: 14.630000000000013
  episode_reward_mean: 1.8386792452830225
  episode_reward_min: -2.2199999999999984
  episodes_this_iter: 106
  episodes_total: 3006
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.582319697049948
          entropy_coeff: 0.01
          kl: 0.014526438856344704
          policy_loss: -0.057790955503144836
          total_loss: 0.1470491826777052
          vf_explained_var: 0.7188235521316528
          vf_loss: 0.19343366217759683
    num_agent_steps_sampled: 289884
    num_agent_steps_trained: 289884
    num_steps_sampled: 289884
    num_steps_trained: 289884
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,29,8528.88,289884,1.83868,14.63,-2.22,94.1415


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 299880
  custom_metrics: {}
  date: 2021-11-14_17-05-56
  done: false
  episode_len_mean: 94.60952380952381
  episode_media: {}
  episode_reward_max: 8.810000000000011
  episode_reward_mean: 1.920380952380957
  episode_reward_min: -1.7000000000000006
  episodes_this_iter: 105
  episodes_total: 3111
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5851261117519475
          entropy_coeff: 0.01
          kl: 0.014818931295880404
          policy_loss: -0.05378797799348831
          total_loss: 0.16705890245831165
          vf_explained_var: 0.7302387952804565
          vf_loss: 0.2087188432383168
    num_agent_steps_sampled: 299880
    num_agent_steps_trained: 299880
    num_steps_sampled: 299880
    num_steps_trained: 299880
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,30,8820.88,299880,1.92038,8.81,-1.7,94.6095




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 309876
  custom_metrics: {}
  date: 2021-11-14_17-11-07
  done: false
  episode_len_mean: 93.16822429906541
  episode_media: {}
  episode_reward_max: 14.550000000000015
  episode_reward_mean: 2.1513084112149574
  episode_reward_min: -1.9000000000000012
  episodes_this_iter: 107
  episodes_total: 3218
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5808543984706587
          entropy_coeff: 0.01
          kl: 0.016710962400869445
          policy_loss: -0.05846476658510092
          total_loss: 0.18203106402761787
          vf_explained_var: 0.7445911169052124
          vf_loss: 0.22347600632546166
    num_agent_steps_sampled: 309876
    num_agent_steps_trained: 309876
    num_steps_sampled: 309876
    num_steps_trained: 309876
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,31,9132.03,309876,2.15131,14.55,-1.9,93.1682




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 319872
  custom_metrics: {}
  date: 2021-11-14_17-16-08
  done: false
  episode_len_mean: 94.80952380952381
  episode_media: {}
  episode_reward_max: 6.830000000000014
  episode_reward_mean: 1.5287619047619085
  episode_reward_min: -1.9800000000000009
  episodes_this_iter: 105
  episodes_total: 3323
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5896173026826648
          entropy_coeff: 0.01
          kl: 0.014390020027995568
          policy_loss: -0.06033985315201183
          total_loss: 0.15838939999270008
          vf_explained_var: 0.7431181073188782
          vf_loss: 0.20774537968393575
    num_agent_steps_sampled: 319872
    num_agent_steps_trained: 319872
    num_steps_sampled: 319872
    num_steps_trained: 319872
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,32,9433.01,319872,1.52876,6.83,-1.98,94.8095




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 329868
  custom_metrics: {}
  date: 2021-11-14_17-21-05
  done: false
  episode_len_mean: 95.87619047619047
  episode_media: {}
  episode_reward_max: 8.66000000000001
  episode_reward_mean: 2.235238095238101
  episode_reward_min: -1.9600000000000013
  episodes_this_iter: 105
  episodes_total: 3428
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5857653080907643
          entropy_coeff: 0.01
          kl: 0.01626686169534125
          policy_loss: -0.057534125109768325
          total_loss: 0.16942929432202036
          vf_explained_var: 0.7877746820449829
          vf_loss: 0.21113088652093567
    num_agent_steps_sampled: 329868
    num_agent_steps_trained: 329868
    num_steps_sampled: 329868
    num_steps_trained: 329868
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,33,9729.39,329868,2.23524,8.66,-1.96,95.8762




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 339864
  custom_metrics: {}
  date: 2021-11-14_17-26-12
  done: false
  episode_len_mean: 94.98095238095237
  episode_media: {}
  episode_reward_max: 9.010000000000016
  episode_reward_mean: 2.045428571428577
  episode_reward_min: -1.7100000000000006
  episodes_this_iter: 105
  episodes_total: 3533
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.559666298291622
          entropy_coeff: 0.01
          kl: 0.016356336874932775
          policy_loss: -0.062473083486478044
          total_loss: 0.15810741582073462
          vf_explained_var: 0.7880002856254578
          vf_loss: 0.20425766033924417
    num_agent_steps_sampled: 339864
    num_agent_steps_trained: 339864
    num_steps_sampled: 339864
    num_steps_trained: 339864
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,34,10037,339864,2.04543,9.01,-1.71,94.981




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 349860
  custom_metrics: {}
  date: 2021-11-14_17-31-16
  done: false
  episode_len_mean: 94.5
  episode_media: {}
  episode_reward_max: 10.450000000000017
  episode_reward_mean: 1.8481132075471747
  episode_reward_min: -1.980000000000001
  episodes_this_iter: 106
  episodes_total: 3639
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5613448188855097
          entropy_coeff: 0.01
          kl: 0.015348474480575422
          policy_loss: -0.06234284111569261
          total_loss: 0.12560899546389812
          vf_explained_var: 0.7819280624389648
          vf_loss: 0.1742288253039249
    num_agent_steps_sampled: 349860
    num_agent_steps_trained: 349860
    num_steps_sampled: 349860
    num_steps_trained: 349860
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,35,10340.6,349860,1.84811,10.45,-1.98,94.5




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 359856
  custom_metrics: {}
  date: 2021-11-14_17-36-23
  done: false
  episode_len_mean: 96.14423076923077
  episode_media: {}
  episode_reward_max: 10.280000000000017
  episode_reward_mean: 2.070480769230775
  episode_reward_min: -2.259999999999998
  episodes_this_iter: 104
  episodes_total: 3743
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.560037349941384
          entropy_coeff: 0.01
          kl: 0.01489954991172307
          policy_loss: -0.06522251498849632
          total_loss: 0.12923692465306091
          vf_explained_var: 0.749926745891571
          vf_loss: 0.1818738979104365
    num_agent_steps_sampled: 359856
    num_agent_steps_trained: 359856
    num_steps_sampled: 359856
    num_steps_trained: 359856
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,36,10647.6,359856,2.07048,10.28,-2.26,96.1442




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 369852
  custom_metrics: {}
  date: 2021-11-14_17-41-26
  done: false
  episode_len_mean: 95.0
  episode_media: {}
  episode_reward_max: 12.750000000000018
  episode_reward_mean: 2.5010476190476245
  episode_reward_min: -1.7300000000000009
  episodes_this_iter: 105
  episodes_total: 3848
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5612338684562945
          entropy_coeff: 0.01
          kl: 0.016568342916479
          policy_loss: -0.06030153909729969
          total_loss: 0.1614219836365336
          vf_explained_var: 0.8153680562973022
          vf_loss: 0.2048730126924367
    num_agent_steps_sampled: 369852
    num_agent_steps_trained: 369852
    num_steps_sampled: 369852
    num_steps_trained: 369852
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,37,10950.9,369852,2.50105,12.75,-1.73,95




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 379848
  custom_metrics: {}
  date: 2021-11-14_17-46-27
  done: false
  episode_len_mean: 96.09615384615384
  episode_media: {}
  episode_reward_max: 10.300000000000017
  episode_reward_mean: 2.296923076923083
  episode_reward_min: -2.0200000000000005
  episodes_this_iter: 104
  episodes_total: 3952
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5830588850200686
          entropy_coeff: 0.01
          kl: 0.015432534270562591
          policy_loss: -0.058273292255675437
          total_loss: 0.15640810851791762
          vf_explained_var: 0.8080220222473145
          vf_loss: 0.20096009283676808
    num_agent_steps_sampled: 379848
    num_agent_steps_trained: 379848
    num_steps_sampled: 379848
    num_steps_trained: 379848
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,38,11251.7,379848,2.29692,10.3,-2.02,96.0962




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 389844
  custom_metrics: {}
  date: 2021-11-14_17-51-29
  done: false
  episode_len_mean: 97.25242718446601
  episode_media: {}
  episode_reward_max: 10.610000000000017
  episode_reward_mean: 1.9056310679611708
  episode_reward_min: -2.149999999999998
  episodes_this_iter: 103
  episodes_total: 4055
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.583685732091594
          entropy_coeff: 0.01
          kl: 0.015040750830955197
          policy_loss: -0.0636506521055459
          total_loss: 0.13192660332076314
          vf_explained_var: 0.7843947410583496
          vf_loss: 0.18286631515520252
    num_agent_steps_sampled: 389844
    num_agent_steps_trained: 389844
    num_steps_sampled: 389844
    num_steps_trained: 389844
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,39,11553.5,389844,1.90563,10.61,-2.15,97.2524




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 399840
  custom_metrics: {}
  date: 2021-11-14_17-56-31
  done: false
  episode_len_mean: 96.0673076923077
  episode_media: {}
  episode_reward_max: 10.610000000000014
  episode_reward_mean: 1.513173076923081
  episode_reward_min: -2.079999999999999
  episodes_this_iter: 104
  episodes_total: 4159
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.588281627903637
          entropy_coeff: 0.01
          kl: 0.013877036709522603
          policy_loss: -0.06546299894873658
          total_loss: 0.11415193341990822
          vf_explained_var: 0.7516317367553711
          vf_loss: 0.16993242299550365
    num_agent_steps_sampled: 399840
    num_agent_steps_trained: 399840
    num_steps_sampled: 399840
    num_steps_trained: 399840
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,40,11855.1,399840,1.51317,10.61,-2.08,96.0673




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 409836
  custom_metrics: {}
  date: 2021-11-14_18-01-29
  done: false
  episode_len_mean: 95.66666666666667
  episode_media: {}
  episode_reward_max: 11.10000000000001
  episode_reward_mean: 2.126857142857147
  episode_reward_min: -2.1199999999999983
  episodes_this_iter: 105
  episodes_total: 4264
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5762263618982755
          entropy_coeff: 0.01
          kl: 0.015821370600555343
          policy_loss: -0.0641740671528153
          total_loss: 0.12568211444597852
          vf_explained_var: 0.8009827136993408
          vf_loss: 0.17507000355503688
    num_agent_steps_sampled: 409836
    num_agent_steps_trained: 409836
    num_steps_sampled: 409836
    num_steps_trained: 409836
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,41,12153.2,409836,2.12686,11.1,-2.12,95.6667


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 419832
  custom_metrics: {}
  date: 2021-11-14_18-06-20
  done: false
  episode_len_mean: 98.89108910891089
  episode_media: {}
  episode_reward_max: 8.680000000000014
  episode_reward_mean: 1.9891089108910942
  episode_reward_min: -2.0700000000000007
  episodes_this_iter: 101
  episodes_total: 4365
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5894092527210204
          entropy_coeff: 0.01
          kl: 0.015754456086417484
          policy_loss: -0.06015002717518717
          total_loss: 0.13434963416483284
          vf_explained_var: 0.7770410776138306
          vf_loss: 0.18001680749221743
    num_agent_steps_sampled: 419832
    num_agent_steps_trained: 419832
    num_steps_sampled: 419832
    num_steps_trained: 419832
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,42,12444.8,419832,1.98911,8.68,-2.07,98.8911


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 429828
  custom_metrics: {}
  date: 2021-11-14_18-11-11
  done: false
  episode_len_mean: 97.96078431372548
  episode_media: {}
  episode_reward_max: 10.490000000000016
  episode_reward_mean: 1.9486274509803974
  episode_reward_min: -1.980000000000001
  episodes_this_iter: 102
  episodes_total: 4467
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5749557081450765
          entropy_coeff: 0.01
          kl: 0.014576571356247278
          policy_loss: -0.06364019793243363
          total_loss: 0.11612465370756885
          vf_explained_var: 0.8113839626312256
          vf_loss: 0.16815625105658147
    num_agent_steps_sampled: 429828
    num_agent_steps_trained: 429828
    num_steps_sampled: 429828
    num_steps_trained: 429828
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,43,12735.4,429828,1.94863,10.49,-1.98,97.9608




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 439824
  custom_metrics: {}
  date: 2021-11-14_18-16-49
  done: false
  episode_len_mean: 94.95192307692308
  episode_media: {}
  episode_reward_max: 12.660000000000014
  episode_reward_mean: 2.071250000000005
  episode_reward_min: -2.219999999999998
  episodes_this_iter: 104
  episodes_total: 4571
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5755681451569257
          entropy_coeff: 0.01
          kl: 0.015923553186511327
          policy_loss: -0.06246066974798361
          total_loss: 0.1371981978870164
          vf_explained_var: 0.7613310813903809
          vf_loss: 0.18460422446712468
    num_agent_steps_sampled: 439824
    num_agent_steps_trained: 439824
    num_steps_sampled: 439824
    num_steps_trained: 439824
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,44,13073,439824,2.07125,12.66,-2.22,94.9519


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 449820
  custom_metrics: {}
  date: 2021-11-14_18-21-50
  done: false
  episode_len_mean: 98.46601941747574
  episode_media: {}
  episode_reward_max: 10.680000000000016
  episode_reward_mean: 2.037281553398064
  episode_reward_min: -1.7400000000000009
  episodes_this_iter: 103
  episodes_total: 4674
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5734053449753005
          entropy_coeff: 0.01
          kl: 0.014926903928372641
          policy_loss: -0.06521144844949818
          total_loss: 0.11814858285256494
          vf_explained_var: 0.7864267230033875
          vf_loss: 0.17083806361223006
    num_agent_steps_sampled: 449820
    num_agent_steps_trained: 449820
    num_steps_sampled: 449820
    num_steps_trained: 449820
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,45,13374.9,449820,2.03728,10.68,-1.74,98.466




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 459816
  custom_metrics: {}
  date: 2021-11-14_18-27-04
  done: false
  episode_len_mean: 96.67961165048544
  episode_media: {}
  episode_reward_max: 11.010000000000012
  episode_reward_mean: 2.4835922330097144
  episode_reward_min: -1.880000000000001
  episodes_this_iter: 103
  episodes_total: 4777
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5662724315610705
          entropy_coeff: 0.01
          kl: 0.016925706331568768
          policy_loss: -0.06522116734622381
          total_loss: 0.1492521928722819
          vf_explained_var: 0.7851283550262451
          vf_loss: 0.1967573508524742
    num_agent_steps_sampled: 459816
    num_agent_steps_trained: 459816
    num_steps_sampled: 459816
    num_steps_trained: 459816
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,46,13688.8,459816,2.48359,11.01,-1.88,96.6796




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 469812
  custom_metrics: {}
  date: 2021-11-14_18-32-27
  done: false
  episode_len_mean: 96.80582524271844
  episode_media: {}
  episode_reward_max: 16.419999999999963
  episode_reward_mean: 1.8071844660194212
  episode_reward_min: -1.730000000000001
  episodes_this_iter: 103
  episodes_total: 4880
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.56159122693233
          entropy_coeff: 0.01
          kl: 0.016070698658833087
          policy_loss: -0.06289070767119655
          total_loss: 0.1547669149748185
          vf_explained_var: 0.781723141670227
          vf_loss: 0.20208609324609303
    num_agent_steps_sampled: 469812
    num_agent_steps_trained: 469812
    num_steps_sampled: 469812
    num_steps_trained: 469812
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,47,14010.9,469812,1.80718,16.42,-1.73,96.8058




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 479808
  custom_metrics: {}
  date: 2021-11-14_18-37-56
  done: false
  episode_len_mean: 96.96116504854369
  episode_media: {}
  episode_reward_max: 10.470000000000013
  episode_reward_mean: 2.2660194174757335
  episode_reward_min: -1.7400000000000009
  episodes_this_iter: 103
  episodes_total: 4983
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.568333717696687
          entropy_coeff: 0.01
          kl: 0.016492835037834486
          policy_loss: -0.0642034399163965
          total_loss: 0.14482020382554486
          vf_explained_var: 0.7662680149078369
          vf_loss: 0.19243764904622213
    num_agent_steps_sampled: 479808
    num_agent_steps_trained: 479808
    num_steps_sampled: 479808
    num_steps_trained: 479808
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,48,14339.9,479808,2.26602,10.47,-1.74,96.9612


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 489804
  custom_metrics: {}
  date: 2021-11-14_18-42-56
  done: false
  episode_len_mean: 96.64423076923077
  episode_media: {}
  episode_reward_max: 12.420000000000012
  episode_reward_mean: 2.8210576923076993
  episode_reward_min: -2.0099999999999993
  episodes_this_iter: 104
  episodes_total: 5087
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.555722782652602
          entropy_coeff: 0.01
          kl: 0.015873147133034112
          policy_loss: -0.06911472527222692
          total_loss: 0.11072130407421635
          vf_explained_var: 0.8455419540405273
          vf_loss: 0.16471211756300977
    num_agent_steps_sampled: 489804
    num_agent_steps_trained: 489804
    num_steps_sampled: 489804
    num_steps_trained: 489804
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,49,14639.9,489804,2.82106,12.42,-2.01,96.6442


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 499800
  custom_metrics: {}
  date: 2021-11-14_18-47-52
  done: false
  episode_len_mean: 99.77
  episode_media: {}
  episode_reward_max: 8.490000000000016
  episode_reward_mean: 2.142900000000006
  episode_reward_min: -1.830000000000001
  episodes_this_iter: 100
  episodes_total: 5187
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.56054032765902
          entropy_coeff: 0.01
          kl: 0.01662307405462549
          policy_loss: -0.06482438477767138
          total_loss: 0.14611082546389065
          vf_explained_var: 0.8170629739761353
          vf_loss: 0.1939374939005217
    num_agent_steps_sampled: 499800
    num_agent_steps_trained: 499800
    num_steps_sampled: 499800
    num_steps_trained: 499800
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,50,14936,499800,2.1429,8.49,-1.83,99.77




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 509796
  custom_metrics: {}
  date: 2021-11-14_18-53-15
  done: false
  episode_len_mean: 94.64761904761905
  episode_media: {}
  episode_reward_max: 6.9000000000000155
  episode_reward_mean: 1.6574285714285755
  episode_reward_min: -1.9800000000000009
  episodes_this_iter: 105
  episodes_total: 5292
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5728517648501272
          entropy_coeff: 0.01
          kl: 0.015414993540687138
          policy_loss: -0.06842817571252967
          total_loss: 0.12108448853210793
          vf_explained_var: 0.7387208938598633
          vf_loss: 0.17573424040212526
    num_agent_steps_sampled: 509796
    num_agent_steps_trained: 509796
    num_steps_sampled: 509796
    num_steps_trained: 509796
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,51,15259.1,509796,1.65743,6.9,-1.98,94.6476




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 519792
  custom_metrics: {}
  date: 2021-11-14_18-58-46
  done: false
  episode_len_mean: 97.41176470588235
  episode_media: {}
  episode_reward_max: 11.060000000000013
  episode_reward_mean: 2.326764705882358
  episode_reward_min: -1.790000000000001
  episodes_this_iter: 102
  episodes_total: 5394
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5635588821182904
          entropy_coeff: 0.01
          kl: 0.01670597467390989
          policy_loss: -0.06633417455750143
          total_loss: 0.12356031411924423
          vf_explained_var: 0.7999658584594727
          vf_loss: 0.1727144927618245
    num_agent_steps_sampled: 519792
    num_agent_steps_trained: 519792
    num_steps_sampled: 519792
    num_steps_trained: 519792
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,52,15590.2,519792,2.32676,11.06,-1.79,97.4118


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 529788
  custom_metrics: {}
  date: 2021-11-14_19-03-44
  done: false
  episode_len_mean: 97.47572815533981
  episode_media: {}
  episode_reward_max: 9.100000000000012
  episode_reward_mean: 1.8826213592233056
  episode_reward_min: -1.9400000000000008
  episodes_this_iter: 103
  episodes_total: 5497
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5643992714392834
          entropy_coeff: 0.01
          kl: 0.01694179143524277
          policy_loss: -0.0630826420421338
          total_loss: 0.1367810504273193
          vf_explained_var: 0.8140629529953003
          vf_loss: 0.18208772794216171
    num_agent_steps_sampled: 529788
    num_agent_steps_trained: 529788
    num_steps_sampled: 529788
    num_steps_trained: 529788
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,53,15888,529788,1.88262,9.1,-1.94,97.4757


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 539784
  custom_metrics: {}
  date: 2021-11-14_19-08-40
  done: false
  episode_len_mean: 99.01960784313725
  episode_media: {}
  episode_reward_max: 10.250000000000016
  episode_reward_mean: 2.2787254901960847
  episode_reward_min: -1.7200000000000009
  episodes_this_iter: 102
  episodes_total: 5599
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5650242653667417
          entropy_coeff: 0.01
          kl: 0.015093052584488913
          policy_loss: -0.07204266106908838
          total_loss: 0.09134344308175402
          vf_explained_var: 0.8394483327865601
          vf_loss: 0.15035450451800392
    num_agent_steps_sampled: 539784
    num_agent_steps_trained: 539784
    num_steps_sampled: 539784
    num_steps_trained: 539784
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,54,16184.1,539784,2.27873,10.25,-1.72,99.0196




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 549780
  custom_metrics: {}
  date: 2021-11-14_19-13-50
  done: false
  episode_len_mean: 95.25961538461539
  episode_media: {}
  episode_reward_max: 7.240000000000014
  episode_reward_mean: 1.8837500000000058
  episode_reward_min: -2.11
  episodes_this_iter: 104
  episodes_total: 5703
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5664462346297046
          entropy_coeff: 0.01
          kl: 0.016794230592454523
          policy_loss: -0.07028174814171134
          total_loss: 0.11398230192219663
          vf_explained_var: 0.8116009831428528
          vf_loss: 0.16688673812617413
    num_agent_steps_sampled: 549780
    num_agent_steps_trained: 549780
    num_steps_sampled: 549780
    num_steps_trained: 549780
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,55,16493.7,549780,1.88375,7.24,-2.11,95.2596




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 559776
  custom_metrics: {}
  date: 2021-11-14_19-19-03
  done: false
  episode_len_mean: 95.75961538461539
  episode_media: {}
  episode_reward_max: 10.310000000000016
  episode_reward_mean: 2.2206730769230827
  episode_reward_min: -1.9400000000000013
  episodes_this_iter: 104
  episodes_total: 5807
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.570980368822049
          entropy_coeff: 0.01
          kl: 0.01622199195330858
          policy_loss: -0.06567862729987718
          total_loss: 0.11127333860629453
          vf_explained_var: 0.8532184958457947
          vf_loss: 0.16108658016125998
    num_agent_steps_sampled: 559776
    num_agent_steps_trained: 559776
    num_steps_sampled: 559776
    num_steps_trained: 559776
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,56,16807.2,559776,2.22067,10.31,-1.94,95.7596




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 569772
  custom_metrics: {}
  date: 2021-11-14_19-24-16
  done: false
  episode_len_mean: 98.33980582524272
  episode_media: {}
  episode_reward_max: 12.800000000000015
  episode_reward_mean: 2.0951456310679664
  episode_reward_min: -1.8800000000000012
  episodes_this_iter: 103
  episodes_total: 5910
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5658853833491984
          entropy_coeff: 0.01
          kl: 0.01619378611772238
          policy_loss: -0.06474879480516299
          total_loss: 0.1200120730795221
          vf_explained_var: 0.7873438000679016
          vf_loss: 0.16891682003107336
    num_agent_steps_sampled: 569772
    num_agent_steps_trained: 569772
    num_steps_sampled: 569772
    num_steps_trained: 569772
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,57,17119.8,569772,2.09515,12.8,-1.88,98.3398




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 579768
  custom_metrics: {}
  date: 2021-11-14_19-29-31
  done: false
  episode_len_mean: 95.54368932038835
  episode_media: {}
  episode_reward_max: 8.620000000000017
  episode_reward_mean: 2.3000970873786466
  episode_reward_min: -1.6600000000000008
  episodes_this_iter: 103
  episodes_total: 6013
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5499601354965797
          entropy_coeff: 0.01
          kl: 0.015548596245024046
          policy_loss: -0.06934854329452236
          total_loss: 0.09234743708720765
          vf_explained_var: 0.860692024230957
          vf_loss: 0.14734623023054094
    num_agent_steps_sampled: 579768
    num_agent_steps_trained: 579768
    num_steps_sampled: 579768
    num_steps_trained: 579768
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,58,17435.2,579768,2.3001,8.62,-1.66,95.5437




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 589764
  custom_metrics: {}
  date: 2021-11-14_19-35-11
  done: false
  episode_len_mean: 97.36893203883496
  episode_media: {}
  episode_reward_max: 9.760000000000002
  episode_reward_mean: 2.061650485436899
  episode_reward_min: -1.7300000000000004
  episodes_this_iter: 103
  episodes_total: 6116
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5746788846121893
          entropy_coeff: 0.01
          kl: 0.016106837603738735
          policy_loss: -0.0678455212006234
          total_loss: 0.12461793024140673
          vf_explained_var: 0.7892800569534302
          vf_loss: 0.17693017796796356
    num_agent_steps_sampled: 589764
    num_agent_steps_trained: 589764
    num_steps_sampled: 589764
    num_steps_trained: 589764
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,59,17775.1,589764,2.06165,9.76,-1.73,97.3689


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 599760
  custom_metrics: {}
  date: 2021-11-14_19-40-04
  done: false
  episode_len_mean: 99.27450980392157
  episode_media: {}
  episode_reward_max: 10.34000000000002
  episode_reward_mean: 2.465294117647065
  episode_reward_min: -2.0599999999999987
  episodes_this_iter: 102
  episodes_total: 6218
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.577608354275043
          entropy_coeff: 0.01
          kl: 0.015623231323259202
          policy_loss: -0.06602522920833057
          total_loss: 0.08886749180328324
          vf_explained_var: 0.8619842529296875
          vf_loss: 0.14062817245522816
    num_agent_steps_sampled: 599760
    num_agent_steps_trained: 599760
    num_steps_sampled: 599760
    num_steps_trained: 599760
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,60,18068.2,599760,2.46529,10.34,-2.06,99.2745


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 609756
  custom_metrics: {}
  date: 2021-11-14_19-45-01
  done: false
  episode_len_mean: 97.45098039215686
  episode_media: {}
  episode_reward_max: 8.550000000000017
  episode_reward_mean: 2.2210784313725553
  episode_reward_min: -1.9000000000000008
  episodes_this_iter: 102
  episodes_total: 6320
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5674553842626064
          entropy_coeff: 0.01
          kl: 0.015870046660469216
          policy_loss: -0.06735056701641626
          total_loss: 0.10089946897485508
          vf_explained_var: 0.818464457988739
          vf_loss: 0.153251397352602
    num_agent_steps_sampled: 609756
    num_agent_steps_trained: 609756
    num_steps_sampled: 609756
    num_steps_trained: 609756
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,61,18364.8,609756,2.22108,8.55,-1.9,97.451




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 619752
  custom_metrics: {}
  date: 2021-11-14_19-50-28
  done: false
  episode_len_mean: 95.62857142857143
  episode_media: {}
  episode_reward_max: 8.980000000000013
  episode_reward_mean: 2.142000000000006
  episode_reward_min: -1.7700000000000007
  episodes_this_iter: 105
  episodes_total: 6425
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5732113941102965
          entropy_coeff: 0.01
          kl: 0.014286047546309855
          policy_loss: -0.06830767460135567
          total_loss: 0.09491109809655154
          vf_explained_var: 0.8274552226066589
          vf_loss: 0.15233730995454467
    num_agent_steps_sampled: 619752
    num_agent_steps_trained: 619752
    num_steps_sampled: 619752
    num_steps_trained: 619752
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,62,18691.8,619752,2.142,8.98,-1.77,95.6286




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 629748
  custom_metrics: {}
  date: 2021-11-14_19-55-37
  done: false
  episode_len_mean: 96.2135922330097
  episode_media: {}
  episode_reward_max: 10.960000000000013
  episode_reward_mean: 2.3939805825242777
  episode_reward_min: -2.0600000000000005
  episodes_this_iter: 103
  episodes_total: 6528
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.578381388819116
          entropy_coeff: 0.01
          kl: 0.016696693948290455
          policy_loss: -0.06768832699570837
          total_loss: 0.11273377570592297
          vf_explained_var: 0.8134579062461853
          vf_loss: 0.1634141177368852
    num_agent_steps_sampled: 629748
    num_agent_steps_trained: 629748
    num_steps_sampled: 629748
    num_steps_trained: 629748
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,63,19000.7,629748,2.39398,10.96,-2.06,96.2136


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 639744
  custom_metrics: {}
  date: 2021-11-14_20-00-36
  done: false
  episode_len_mean: 96.27619047619048
  episode_media: {}
  episode_reward_max: 10.750000000000016
  episode_reward_mean: 2.2942857142857194
  episode_reward_min: -2.05
  episodes_this_iter: 105
  episodes_total: 6633
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5866217356461747
          entropy_coeff: 0.01
          kl: 0.015851836141782833
          policy_loss: -0.06924217230696073
          total_loss: 0.10923908247779578
          vf_explained_var: 0.808504581451416
          vf_loss: 0.1637209505865621
    num_agent_steps_sampled: 639744
    num_agent_steps_trained: 639744
    num_steps_sampled: 639744
    num_steps_trained: 639744
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,64,19299.7,639744,2.29429,10.75,-2.05,96.2762




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 649740
  custom_metrics: {}
  date: 2021-11-14_20-05-46
  done: false
  episode_len_mean: 96.35922330097087
  episode_media: {}
  episode_reward_max: 9.680000000000023
  episode_reward_mean: 2.073883495145636
  episode_reward_min: -1.9400000000000008
  episodes_this_iter: 103
  episodes_total: 6736
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5912129587597317
          entropy_coeff: 0.01
          kl: 0.016258706446796167
          policy_loss: -0.07003343247482155
          total_loss: 0.0945149544148873
          vf_explained_var: 0.8152331113815308
          vf_loss: 0.14879123152384902
    num_agent_steps_sampled: 649740
    num_agent_steps_trained: 649740
    num_steps_sampled: 649740
    num_steps_trained: 649740
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,65,19609.3,649740,2.07388,9.68,-1.94,96.3592




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 659736
  custom_metrics: {}
  date: 2021-11-14_20-11-11
  done: false
  episode_len_mean: 96.06666666666666
  episode_media: {}
  episode_reward_max: 8.680000000000009
  episode_reward_mean: 1.9820000000000046
  episode_reward_min: -2.2699999999999996
  episodes_this_iter: 105
  episodes_total: 6841
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5786462116445232
          entropy_coeff: 0.01
          kl: 0.015405636741231805
          policy_loss: -0.06927366828780748
          total_loss: 0.0832186771879116
          vf_explained_var: 0.7996466159820557
          vf_loss: 0.13879584612476073
    num_agent_steps_sampled: 659736
    num_agent_steps_trained: 659736
    num_steps_sampled: 659736
    num_steps_trained: 659736
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,66,19934.3,659736,1.982,8.68,-2.27,96.0667


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 669732
  custom_metrics: {}
  date: 2021-11-14_20-16-10
  done: false
  episode_len_mean: 95.84466019417475
  episode_media: {}
  episode_reward_max: 12.470000000000017
  episode_reward_mean: 2.8654368932038907
  episode_reward_min: -2.0200000000000005
  episodes_this_iter: 103
  episodes_total: 6944
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.54894303012098
          entropy_coeff: 0.01
          kl: 0.017094502995786808
          policy_loss: -0.06646150944706722
          total_loss: 0.12670183853628353
          vf_explained_var: 0.8057805299758911
          vf_loss: 0.17484143747128228
    num_agent_steps_sampled: 669732
    num_agent_steps_trained: 669732
    num_steps_sampled: 669732
    num_steps_trained: 669732
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,67,20234,669732,2.86544,12.47,-2.02,95.8447




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 679728
  custom_metrics: {}
  date: 2021-11-14_20-21-23
  done: false
  episode_len_mean: 96.9126213592233
  episode_media: {}
  episode_reward_max: 10.130000000000019
  episode_reward_mean: 2.189611650485443
  episode_reward_min: -1.9200000000000013
  episodes_this_iter: 103
  episodes_total: 7047
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.592794686810583
          entropy_coeff: 0.01
          kl: 0.016309693196522163
          policy_loss: -0.06459569484691939
          total_loss: 0.10561659728718173
          vf_explained_var: 0.8365724682807922
          vf_loss: 0.15434027943704437
    num_agent_steps_sampled: 679728
    num_agent_steps_trained: 679728
    num_steps_sampled: 679728
    num_steps_trained: 679728
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,68,20546.6,679728,2.18961,10.13,-1.92,96.9126




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 689724
  custom_metrics: {}
  date: 2021-11-14_20-26-39
  done: false
  episode_len_mean: 95.01904761904763
  episode_media: {}
  episode_reward_max: 10.380000000000017
  episode_reward_mean: 1.9444761904761956
  episode_reward_min: -1.930000000000001
  episodes_this_iter: 105
  episodes_total: 7152
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5806669255607146
          entropy_coeff: 0.01
          kl: 0.015965262829195307
          policy_loss: -0.07053364888549997
          total_loss: 0.08779958554217195
          vf_explained_var: 0.8480408191680908
          vf_loss: 0.14322268257204157
    num_agent_steps_sampled: 689724
    num_agent_steps_trained: 689724
    num_steps_sampled: 689724
    num_steps_trained: 689724
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,69,20862.2,689724,1.94448,10.38,-1.93,95.019




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 699720
  custom_metrics: {}
  date: 2021-11-14_20-31-54
  done: false
  episode_len_mean: 95.49038461538461
  episode_media: {}
  episode_reward_max: 10.620000000000015
  episode_reward_mean: 2.8697115384615453
  episode_reward_min: -1.9400000000000004
  episodes_this_iter: 104
  episodes_total: 7256
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.573420423320216
          entropy_coeff: 0.01
          kl: 0.01689436132807545
          policy_loss: -0.06621135098413906
          total_loss: 0.124009865651337
          vf_explained_var: 0.8319177627563477
          vf_loss: 0.172657021214692
    num_agent_steps_sampled: 699720
    num_agent_steps_trained: 699720
    num_steps_sampled: 699720
    num_steps_trained: 699720
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,70,21177.3,699720,2.86971,10.62,-1.94,95.4904


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 709716
  custom_metrics: {}
  date: 2021-11-14_20-36-57
  done: false
  episode_len_mean: 96.34285714285714
  episode_media: {}
  episode_reward_max: 10.03000000000002
  episode_reward_mean: 2.502952380952388
  episode_reward_min: -2.289999999999997
  episodes_this_iter: 105
  episodes_total: 7361
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.569561297363705
          entropy_coeff: 0.01
          kl: 0.01693936905399794
          policy_loss: -0.06582358897400972
          total_loss: 0.12797118104102775
          vf_explained_var: 0.8476150631904602
          vf_loss: 0.17607663498403361
    num_agent_steps_sampled: 709716
    num_agent_steps_trained: 709716
    num_steps_sampled: 709716
    num_steps_trained: 709716
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,71,21480.9,709716,2.50295,10.03,-2.29,96.3429


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 719712
  custom_metrics: {}
  date: 2021-11-14_20-42-01
  done: false
  episode_len_mean: 96.69902912621359
  episode_media: {}
  episode_reward_max: 7.000000000000013
  episode_reward_mean: 2.4134951456310745
  episode_reward_min: -1.6500000000000008
  episodes_this_iter: 103
  episodes_total: 7464
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5836569475312516
          entropy_coeff: 0.01
          kl: 0.015586002035711635
          policy_loss: -0.06949519178089805
          total_loss: 0.10087147371079296
          vf_explained_var: 0.8448976874351501
          vf_loss: 0.15625801714909318
    num_agent_steps_sampled: 719712
    num_agent_steps_trained: 719712
    num_steps_sampled: 719712
    num_steps_trained: 719712
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,72,21784.5,719712,2.4135,7,-1.65,96.699




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 729708
  custom_metrics: {}
  date: 2021-11-14_20-47-26
  done: false
  episode_len_mean: 95.05714285714286
  episode_media: {}
  episode_reward_max: 8.620000000000017
  episode_reward_mean: 1.953809523809529
  episode_reward_min: -2.499999999999997
  episodes_this_iter: 105
  episodes_total: 7569
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5894199455905165
          entropy_coeff: 0.01
          kl: 0.014944076130111963
          policy_loss: -0.07329953429766764
          total_loss: 0.06880934273179334
          vf_explained_var: 0.8559563159942627
          vf_loss: 0.12970304465812876
    num_agent_steps_sampled: 729708
    num_agent_steps_trained: 729708
    num_steps_sampled: 729708
    num_steps_trained: 729708
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,73,22109.7,729708,1.95381,8.62,-2.5,95.0571


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 739704
  custom_metrics: {}
  date: 2021-11-14_20-52-24
  done: false
  episode_len_mean: 97.79611650485437
  episode_media: {}
  episode_reward_max: 10.740000000000009
  episode_reward_mean: 2.2873786407767054
  episode_reward_min: -1.960000000000001
  episodes_this_iter: 103
  episodes_total: 7672
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5809709360456874
          entropy_coeff: 0.01
          kl: 0.015426617190976915
          policy_loss: -0.06953756326857286
          total_loss: 0.08538246704464476
          vf_explained_var: 0.8515567779541016
          vf_loss: 0.141193008938661
    num_agent_steps_sampled: 739704
    num_agent_steps_trained: 739704
    num_steps_sampled: 739704
    num_steps_trained: 739704
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,74,22407.3,739704,2.28738,10.74,-1.96,97.7961




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 749700
  custom_metrics: {}
  date: 2021-11-14_20-57-33
  done: false
  episode_len_mean: 95.85576923076923
  episode_media: {}
  episode_reward_max: 10.650000000000015
  episode_reward_mean: 2.5858653846153907
  episode_reward_min: -2.1199999999999983
  episodes_this_iter: 104
  episodes_total: 7776
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5740628825293648
          entropy_coeff: 0.01
          kl: 0.017030476936525613
          policy_loss: -0.06495091015250128
          total_loss: 0.1271154412613688
          vf_explained_var: 0.8299260139465332
          vf_loss: 0.1741597313258765
    num_agent_steps_sampled: 749700
    num_agent_steps_trained: 749700
    num_steps_sampled: 749700
    num_steps_trained: 749700
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,75,22716.6,749700,2.58587,10.65,-2.12,95.8558




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 759696
  custom_metrics: {}
  date: 2021-11-14_21-02-59
  done: false
  episode_len_mean: 94.95283018867924
  episode_media: {}
  episode_reward_max: 12.290000000000015
  episode_reward_mean: 2.551886792452837
  episode_reward_min: -1.930000000000001
  episodes_this_iter: 106
  episodes_total: 7882
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5888608738907384
          entropy_coeff: 0.01
          kl: 0.01637651877262982
          policy_loss: -0.06449037402167788
          total_loss: 0.1243098045946059
          vf_explained_var: 0.8341062664985657
          vf_loss: 0.17271756246232262
    num_agent_steps_sampled: 759696
    num_agent_steps_trained: 759696
    num_steps_sampled: 759696
    num_steps_trained: 759696
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,76,23042.6,759696,2.55189,12.29,-1.93,94.9528




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 769692
  custom_metrics: {}
  date: 2021-11-14_21-08-09
  done: false
  episode_len_mean: 97.11764705882354
  episode_media: {}
  episode_reward_max: 13.760000000000021
  episode_reward_mean: 2.3139215686274563
  episode_reward_min: -1.9800000000000009
  episodes_this_iter: 102
  episodes_total: 7984
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5692642295462456
          entropy_coeff: 0.01
          kl: 0.017557471939713954
          policy_loss: -0.0669526539868317
          total_loss: 0.11068879303633848
          vf_explained_var: 0.8261582851409912
          vf_loss: 0.1583362101756323
    num_agent_steps_sampled: 769692
    num_agent_steps_trained: 769692
    num_steps_sampled: 769692
    num_steps_trained: 769692
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,77,23352.7,769692,2.31392,13.76,-1.98,97.1176


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 779688
  custom_metrics: {}
  date: 2021-11-14_21-13-05
  done: false
  episode_len_mean: 97.7156862745098
  episode_media: {}
  episode_reward_max: 10.660000000000013
  episode_reward_mean: 2.457450980392163
  episode_reward_min: -1.7300000000000006
  episodes_this_iter: 102
  episodes_total: 8086
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5721633832678834
          entropy_coeff: 0.01
          kl: 0.015394110705207952
          policy_loss: -0.06792275333562149
          total_loss: 0.09927897583534065
          vf_explained_var: 0.8573051691055298
          vf_loss: 0.1534699418510382
    num_agent_steps_sampled: 779688
    num_agent_steps_trained: 779688
    num_steps_sampled: 779688
    num_steps_trained: 779688
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,78,23648.8,779688,2.45745,10.66,-1.73,97.7157




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 789684
  custom_metrics: {}
  date: 2021-11-14_21-18-34
  done: false
  episode_len_mean: 96.47115384615384
  episode_media: {}
  episode_reward_max: 8.770000000000014
  episode_reward_mean: 2.547980769230776
  episode_reward_min: -1.5600000000000007
  episodes_this_iter: 104
  episodes_total: 8190
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5671478144124023
          entropy_coeff: 0.01
          kl: 0.015829267860210976
          policy_loss: -0.0702207419472054
          total_loss: 0.09190415110892783
          vf_explained_var: 0.8591721057891846
          vf_loss: 0.14722768963298674
    num_agent_steps_sampled: 789684
    num_agent_steps_trained: 789684
    num_steps_sampled: 789684
    num_steps_trained: 789684
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,79,23977.1,789684,2.54798,8.77,-1.56,96.4712




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 799680
  custom_metrics: {}
  date: 2021-11-14_21-23-56
  done: false
  episode_len_mean: 96.4368932038835
  episode_media: {}
  episode_reward_max: 22.629999999999942
  episode_reward_mean: 2.813592233009715
  episode_reward_min: -2.000000000000001
  episodes_this_iter: 103
  episodes_total: 8293
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.542233043552464
          entropy_coeff: 0.01
          kl: 0.017930687494531028
          policy_loss: -0.06433693567434183
          total_loss: 0.14079008247209793
          vf_explained_var: 0.8362500071525574
          vf_loss: 0.1845949586492796
    num_agent_steps_sampled: 799680
    num_agent_steps_trained: 799680
    num_steps_sampled: 799680
    num_steps_trained: 799680
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,80,24299.7,799680,2.81359,22.63,-2,96.4369


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 809676
  custom_metrics: {}
  date: 2021-11-14_21-28-53
  done: false
  episode_len_mean: 98.25490196078431
  episode_media: {}
  episode_reward_max: 10.120000000000019
  episode_reward_mean: 2.213823529411771
  episode_reward_min: -2.000000000000001
  episodes_this_iter: 102
  episodes_total: 8395
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.576300395350171
          entropy_coeff: 0.01
          kl: 0.015227908517466587
          policy_loss: -0.06967977848238288
          total_loss: 0.08132910644317157
          vf_explained_var: 0.8351843357086182
          vf_loss: 0.13774442659794456
    num_agent_steps_sampled: 809676
    num_agent_steps_trained: 809676
    num_steps_sampled: 809676
    num_steps_trained: 809676
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,81,24596,809676,2.21382,10.12,-2,98.2549


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 819672
  custom_metrics: {}
  date: 2021-11-14_21-33-50
  done: false
  episode_len_mean: 98.3529411764706
  episode_media: {}
  episode_reward_max: 14.840000000000016
  episode_reward_mean: 2.4992156862745167
  episode_reward_min: -2.2899999999999987
  episodes_this_iter: 102
  episodes_total: 8497
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5757380476364724
          entropy_coeff: 0.01
          kl: 0.016510430015493755
          policy_loss: -0.06827637946121713
          total_loss: 0.1063622707922935
          vf_explained_var: 0.8549844026565552
          vf_loss: 0.1580816057144513
    num_agent_steps_sampled: 819672
    num_agent_steps_trained: 819672
    num_steps_sampled: 819672
    num_steps_trained: 819672
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,82,24893.5,819672,2.49922,14.84,-2.29,98.3529




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 829668
  custom_metrics: {}
  date: 2021-11-14_21-39-15
  done: false
  episode_len_mean: 97.59803921568627
  episode_media: {}
  episode_reward_max: 14.210000000000022
  episode_reward_mean: 2.425000000000007
  episode_reward_min: -1.870000000000001
  episodes_this_iter: 102
  episodes_total: 8599
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5728607325472383
          entropy_coeff: 0.01
          kl: 0.018110347992763513
          policy_loss: -0.06310297990074525
          total_loss: 0.1274685095506123
          vf_explained_var: 0.8193308115005493
          vf_loss: 0.16988525706319474
    num_agent_steps_sampled: 829668
    num_agent_steps_trained: 829668
    num_steps_sampled: 829668
    num_steps_trained: 829668
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,83,25218.1,829668,2.425,14.21,-1.87,97.598




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 839664
  custom_metrics: {}
  date: 2021-11-14_21-44-25
  done: false
  episode_len_mean: 97.1470588235294
  episode_media: {}
  episode_reward_max: 16.189999999999944
  episode_reward_mean: 2.767647058823535
  episode_reward_min: -2.32
  episodes_this_iter: 102
  episodes_total: 8701
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5687695915882403
          entropy_coeff: 0.01
          kl: 0.015749921045689005
          policy_loss: -0.0669032722337442
          total_loss: 0.10493878967399335
          vf_explained_var: 0.8510072827339172
          vf_loss: 0.15716443417840598
    num_agent_steps_sampled: 839664
    num_agent_steps_trained: 839664
    num_steps_sampled: 839664
    num_steps_trained: 839664
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,84,25527.7,839664,2.76765,16.19,-2.32,97.1471


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 849660
  custom_metrics: {}
  date: 2021-11-14_21-49-20
  done: false
  episode_len_mean: 98.71844660194175
  episode_media: {}
  episode_reward_max: 10.190000000000017
  episode_reward_mean: 2.6672815533980647
  episode_reward_min: -2.3599999999999985
  episodes_this_iter: 103
  episodes_total: 8804
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.557832327153948
          entropy_coeff: 0.01
          kl: 0.017148791912527415
          policy_loss: -0.06969668572215347
          total_loss: 0.10737228376122239
          vf_explained_var: 0.8593811392784119
          vf_loss: 0.1586968161042334
    num_agent_steps_sampled: 849660
    num_agent_steps_trained: 849660
    num_steps_sampled: 849660
    num_steps_trained: 849660
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,85,25822.7,849660,2.66728,10.19,-2.36,98.7184




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 859656
  custom_metrics: {}
  date: 2021-11-14_21-54-41
  done: false
  episode_len_mean: 97.5
  episode_media: {}
  episode_reward_max: 12.480000000000018
  episode_reward_mean: 2.560294117647065
  episode_reward_min: -2.080000000000001
  episodes_this_iter: 102
  episodes_total: 8906
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5740565224590464
          entropy_coeff: 0.01
          kl: 0.017358027171041977
          policy_loss: -0.06816464244770125
          total_loss: 0.11332297910036694
          vf_explained_var: 0.8482249975204468
          vf_loss: 0.1627414627510131
    num_agent_steps_sampled: 859656
    num_agent_steps_trained: 859656
    num_steps_sampled: 859656
    num_steps_trained: 859656
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,86,26143.8,859656,2.56029,12.48,-2.08,97.5




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 869652
  custom_metrics: {}
  date: 2021-11-14_21-59-52
  done: false
  episode_len_mean: 96.45192307692308
  episode_media: {}
  episode_reward_max: 12.890000000000017
  episode_reward_mean: 2.681057692307699
  episode_reward_min: -1.800000000000001
  episodes_this_iter: 104
  episodes_total: 9010
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5707016948960786
          entropy_coeff: 0.01
          kl: 0.017455821024947207
          policy_loss: -0.0617071521985862
          total_loss: 0.12781486444764284
          vf_explained_var: 0.8717349767684937
          vf_loss: 0.1704916742224342
    num_agent_steps_sampled: 869652
    num_agent_steps_trained: 869652
    num_steps_sampled: 869652
    num_steps_trained: 869652
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,87,26455.3,869652,2.68106,12.89,-1.8,96.4519


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 879648
  custom_metrics: {}
  date: 2021-11-14_22-04-47
  done: false
  episode_len_mean: 98.93069306930693
  episode_media: {}
  episode_reward_max: 12.990000000000014
  episode_reward_mean: 2.2758415841584223
  episode_reward_min: -1.850000000000001
  episodes_this_iter: 101
  episodes_total: 9111
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5768600191825475
          entropy_coeff: 0.01
          kl: 0.01751139285605878
          policy_loss: -0.0619710557990604
          total_loss: 0.11460953226758756
          vf_explained_var: 0.8482249975204468
          vf_loss: 0.15746940347708316
    num_agent_steps_sampled: 879648
    num_agent_steps_trained: 879648
    num_steps_sampled: 879648
    num_steps_trained: 879648
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,88,26749.7,879648,2.27584,12.99,-1.85,98.9307




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 889644
  custom_metrics: {}
  date: 2021-11-14_22-10-46
  done: false
  episode_len_mean: 96.24038461538461
  episode_media: {}
  episode_reward_max: 12.54000000000002
  episode_reward_mean: 2.455961538461545
  episode_reward_min: -1.800000000000001
  episodes_this_iter: 104
  episodes_total: 9215
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.564348385680435
          entropy_coeff: 0.01
          kl: 0.01576101692503799
          policy_loss: -0.07024000239872939
          total_loss: 0.07655023761813999
          vf_explained_var: 0.8696361184120178
          vf_loss: 0.13203996256328163
    num_agent_steps_sampled: 889644
    num_agent_steps_trained: 889644
    num_steps_sampled: 889644
    num_steps_trained: 889644
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,89,27108.6,889644,2.45596,12.54,-1.8,96.2404




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 899640
  custom_metrics: {}
  date: 2021-11-14_22-16-08
  done: false
  episode_len_mean: 98.67
  episode_media: {}
  episode_reward_max: 10.480000000000018
  episode_reward_mean: 2.4623000000000075
  episode_reward_min: -1.8400000000000007
  episodes_this_iter: 100
  episodes_total: 9315
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5845782755277096
          entropy_coeff: 0.01
          kl: 0.01680043074732781
          policy_loss: -0.06690006933123287
          total_loss: 0.10320671206563074
          vf_explained_var: 0.8594282865524292
          vf_loss: 0.152894898943014
    num_agent_steps_sampled: 899640
    num_agent_steps_trained: 899640
    num_steps_sampled: 899640
    num_steps_trained: 899640
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,90,27430.8,899640,2.4623,10.48,-1.84,98.67




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 909636
  custom_metrics: {}
  date: 2021-11-14_22-21-14
  done: false
  episode_len_mean: 98.42718446601941
  episode_media: {}
  episode_reward_max: 11.860000000000024
  episode_reward_mean: 2.015339805825249
  episode_reward_min: -2.579999999999995
  episodes_this_iter: 103
  episodes_total: 9418
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5957183332524747
          entropy_coeff: 0.01
          kl: 0.015413261247941698
          policy_loss: -0.0689446361353382
          total_loss: 0.08624128452183791
          vf_explained_var: 0.8533629179000854
          vf_loss: 0.14164060089761057
    num_agent_steps_sampled: 909636
    num_agent_steps_trained: 909636
    num_steps_sampled: 909636
    num_steps_trained: 909636
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,91,27736.8,909636,2.01534,11.86,-2.58,98.4272


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 919632
  custom_metrics: {}
  date: 2021-11-14_22-26-10
  done: false
  episode_len_mean: 98.9009900990099
  episode_media: {}
  episode_reward_max: 10.960000000000015
  episode_reward_mean: 2.2839603960396104
  episode_reward_min: -2.04
  episodes_this_iter: 101
  episodes_total: 9519
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.584851366841895
          entropy_coeff: 0.01
          kl: 0.01635847197314503
          policy_loss: -0.06598936820514181
          total_loss: 0.08791989831285726
          vf_explained_var: 0.8579880595207214
          vf_loss: 0.13783280605044312
    num_agent_steps_sampled: 919632
    num_agent_steps_trained: 919632
    num_steps_sampled: 919632
    num_steps_trained: 919632
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,92,28032.8,919632,2.28396,10.96,-2.04,98.901




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 929628
  custom_metrics: {}
  date: 2021-11-14_22-31-22
  done: false
  episode_len_mean: 97.01941747572816
  episode_media: {}
  episode_reward_max: 12.24000000000002
  episode_reward_mean: 2.09796116504855
  episode_reward_min: -1.8100000000000007
  episodes_this_iter: 103
  episodes_total: 9622
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.595285268013294
          entropy_coeff: 0.01
          kl: 0.015971026507441834
          policy_loss: -0.07065385999834627
          total_loss: 0.09907090146787083
          vf_explained_var: 0.8052142858505249
          vf_loss: 0.15474562108612214
    num_agent_steps_sampled: 929628
    num_agent_steps_trained: 929628
    num_steps_sampled: 929628
    num_steps_trained: 929628
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,93,28344.4,929628,2.09796,12.24,-1.81,97.0194




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 939624
  custom_metrics: {}
  date: 2021-11-14_22-37-03
  done: false
  episode_len_mean: 96.66990291262135
  episode_media: {}
  episode_reward_max: 8.490000000000009
  episode_reward_mean: 2.248155339805831
  episode_reward_min: -2.3799999999999994
  episodes_this_iter: 103
  episodes_total: 9725
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6164353105756972
          entropy_coeff: 0.01
          kl: 0.01700826016875829
          policy_loss: -0.06726497000513168
          total_loss: 0.09522544300167733
          vf_explained_var: 0.8142836093902588
          vf_loss: 0.14506445647392455
    num_agent_steps_sampled: 939624
    num_agent_steps_trained: 939624
    num_steps_sampled: 939624
    num_steps_trained: 939624
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,94,28686.3,939624,2.24816,8.49,-2.38,96.6699


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 949620
  custom_metrics: {}
  date: 2021-11-14_22-42-00
  done: false
  episode_len_mean: 98.4059405940594
  episode_media: {}
  episode_reward_max: 8.420000000000016
  episode_reward_mean: 2.3959405940594123
  episode_reward_min: -1.7100000000000009
  episodes_this_iter: 101
  episodes_total: 9826
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5810883753320093
          entropy_coeff: 0.01
          kl: 0.016738624263761392
          policy_loss: -0.0703035324278614
          total_loss: 0.08957273646409059
          vf_explained_var: 0.8462727069854736
          vf_loss: 0.14278789040131065
    num_agent_steps_sampled: 949620
    num_agent_steps_trained: 949620
    num_steps_sampled: 949620
    num_steps_trained: 949620
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,95,28983.3,949620,2.39594,8.42,-1.71,98.4059




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 959616
  custom_metrics: {}
  date: 2021-11-14_22-47-09
  done: false
  episode_len_mean: 98.04901960784314
  episode_media: {}
  episode_reward_max: 10.460000000000017
  episode_reward_mean: 2.27176470588236
  episode_reward_min: -1.8900000000000008
  episodes_this_iter: 102
  episodes_total: 9928
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5890017088661845
          entropy_coeff: 0.01
          kl: 0.016004719514519133
          policy_loss: -0.07137798365269206
          total_loss: 0.08606006706742426
          vf_explained_var: 0.8346642851829529
          vf_loss: 0.14230972247150464
    num_agent_steps_sampled: 959616
    num_agent_steps_trained: 959616
    num_steps_sampled: 959616
    num_steps_trained: 959616
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,96,29291.8,959616,2.27176,10.46,-1.89,98.049




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 969612
  custom_metrics: {}
  date: 2021-11-14_22-52-18
  done: false
  episode_len_mean: 99.54
  episode_media: {}
  episode_reward_max: 12.470000000000018
  episode_reward_mean: 2.559600000000007
  episode_reward_min: -2.129999999999998
  episodes_this_iter: 100
  episodes_total: 10028
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5797832780414156
          entropy_coeff: 0.01
          kl: 0.01751043694433014
          policy_loss: -0.06453033997080265
          total_loss: 0.1088467560437882
          vf_explained_var: 0.8536818623542786
          vf_loss: 0.1542975958205887
    num_agent_steps_sampled: 969612
    num_agent_steps_trained: 969612
    num_steps_sampled: 969612
    num_steps_trained: 969612
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,97,29601,969612,2.5596,12.47,-2.13,99.54




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 979608
  custom_metrics: {}
  date: 2021-11-14_22-57-28
  done: false
  episode_len_mean: 98.45098039215686
  episode_media: {}
  episode_reward_max: 12.440000000000015
  episode_reward_mean: 2.524019607843144
  episode_reward_min: -2.1000000000000005
  episodes_this_iter: 102
  episodes_total: 10130
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.596993138545599
          entropy_coeff: 0.01
          kl: 0.017426948336009603
          policy_loss: -0.06627438539304795
          total_loss: 0.11601057740915408
          vf_explained_var: 0.8504003286361694
          vf_loss: 0.16359153271485596
    num_agent_steps_sampled: 979608
    num_agent_steps_trained: 979608
    num_steps_sampled: 979608
    num_steps_trained: 979608
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,98,29911,979608,2.52402,12.44,-2.1,98.451


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 989604
  custom_metrics: {}
  date: 2021-11-14_23-02-25
  done: false
  episode_len_mean: 99.25490196078431
  episode_media: {}
  episode_reward_max: 22.899999999999935
  episode_reward_mean: 2.3698039215686326
  episode_reward_min: -2.060000000000001
  episodes_this_iter: 102
  episodes_total: 10232
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6018177914823224
          entropy_coeff: 0.01
          kl: 0.018115026934015843
          policy_loss: -0.06303712310006794
          total_loss: 0.11480996523422594
          vf_explained_var: 0.8472806215286255
          vf_loss: 0.15743843523634232
    num_agent_steps_sampled: 989604
    num_agent_steps_trained: 989604
    num_steps_sampled: 989604
    num_steps_trained: 989604
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,99,30207.6,989604,2.3698,22.9,-2.06,99.2549




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 999600
  custom_metrics: {}
  date: 2021-11-14_23-07-46
  done: false
  episode_len_mean: 97.15841584158416
  episode_media: {}
  episode_reward_max: 8.53000000000001
  episode_reward_mean: 2.259009900990105
  episode_reward_min: -2.439999999999996
  episodes_this_iter: 101
  episodes_total: 10333
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6016622639109945
          entropy_coeff: 0.01
          kl: 0.015700346235859957
          policy_loss: -0.06839663633662793
          total_loss: 0.08322221180503694
          vf_explained_var: 0.8491207957267761
          vf_loss: 0.13739720135569
    num_agent_steps_sampled: 999600
    num_agent_steps_trained: 999600
    num_steps_sampled: 999600
    num_steps_trained: 999600
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,100,30528.7,999600,2.25901,8.53,-2.44,97.1584




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1009596
  custom_metrics: {}
  date: 2021-11-14_23-12-55
  done: false
  episode_len_mean: 98.59223300970874
  episode_media: {}
  episode_reward_max: 16.64999999999999
  episode_reward_mean: 2.520679611650492
  episode_reward_min: -2.080000000000001
  episodes_this_iter: 103
  episodes_total: 10436
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.59229332769019
          entropy_coeff: 0.01
          kl: 0.016310050364533174
          policy_loss: -0.07001993228307264
          total_loss: 0.09715427386287886
          vf_explained_var: 0.851718008518219
          vf_loss: 0.15129626535046367
    num_agent_steps_sampled: 1009596
    num_agent_steps_trained: 1009596
    num_steps_sampled: 1009596
    num_steps_trained: 1009596
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,101,30837.7,1009596,2.52068,16.65,-2.08,98.5922


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1019592
  custom_metrics: {}
  date: 2021-11-14_23-17-50
  done: false
  episode_len_mean: 99.23
  episode_media: {}
  episode_reward_max: 10.530000000000015
  episode_reward_mean: 2.7269000000000068
  episode_reward_min: -2.259999999999998
  episodes_this_iter: 100
  episodes_total: 10536
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.580098910922678
          entropy_coeff: 0.01
          kl: 0.0172789748065105
          policy_loss: -0.06434996946133736
          total_loss: 0.10918093767240007
          vf_explained_var: 0.8457563519477844
          vf_loss: 0.15504777417279397
    num_agent_steps_sampled: 1019592
    num_agent_steps_trained: 1019592
    num_steps_sampled: 1019592
    num_steps_trained: 1019592
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,102,31132.5,1019592,2.7269,10.53,-2.26,99.23




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1029588
  custom_metrics: {}
  date: 2021-11-14_23-22-58
  done: false
  episode_len_mean: 97.9126213592233
  episode_media: {}
  episode_reward_max: 16.399999999999935
  episode_reward_mean: 2.994757281553404
  episode_reward_min: -1.570000000000001
  episodes_this_iter: 103
  episodes_total: 10639
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5662286679968873
          entropy_coeff: 0.01
          kl: 0.017435912234151963
          policy_loss: -0.065328445756394
          total_loss: 0.11622880914160966
          vf_explained_var: 0.8582075834274292
          vf_loss: 0.162533206340626
    num_agent_steps_sampled: 1029588
    num_agent_steps_trained: 1029588
    num_steps_sampled: 1029588
    num_steps_trained: 1029588
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,103,31440.1,1029588,2.99476,16.4,-1.57,97.9126




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1039584
  custom_metrics: {}
  date: 2021-11-14_23-28-11
  done: false
  episode_len_mean: 95.60194174757281
  episode_media: {}
  episode_reward_max: 10.820000000000018
  episode_reward_mean: 2.4818446601941804
  episode_reward_min: -2.2200000000000006
  episodes_this_iter: 103
  episodes_total: 10742
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.595364976336813
          entropy_coeff: 0.01
          kl: 0.01574503400244446
          policy_loss: -0.06996877217888195
          total_loss: 0.07672035796806598
          vf_explained_var: 0.8612208962440491
          vf_loss: 0.1322899812613574
    num_agent_steps_sampled: 1039584
    num_agent_steps_trained: 1039584
    num_steps_sampled: 1039584
    num_steps_trained: 1039584
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,104,31753.7,1039584,2.48184,10.82,-2.22,95.6019




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1049580
  custom_metrics: {}
  date: 2021-11-14_23-33-20
  done: false
  episode_len_mean: 98.64077669902913
  episode_media: {}
  episode_reward_max: 12.710000000000015
  episode_reward_mean: 2.5818446601941814
  episode_reward_min: -2.3599999999999985
  episodes_this_iter: 103
  episodes_total: 10845
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5815414428710937
          entropy_coeff: 0.01
          kl: 0.018894168293586942
          policy_loss: -0.061319693681807855
          total_loss: 0.14209294374076983
          vf_explained_var: 0.8343311548233032
          vf_loss: 0.1808043662056677
    num_agent_steps_sampled: 1049580
    num_agent_steps_trained: 1049580
    num_steps_sampled: 1049580
    num_steps_trained: 1049580
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,105,32062.3,1049580,2.58184,12.71,-2.36,98.6408


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1059576
  custom_metrics: {}
  date: 2021-11-14_23-38-15
  done: false
  episode_len_mean: 98.37623762376238
  episode_media: {}
  episode_reward_max: 14.720000000000015
  episode_reward_mean: 2.5181188118811937
  episode_reward_min: -1.960000000000001
  episodes_this_iter: 101
  episodes_total: 10946
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5777914026863553
          entropy_coeff: 0.01
          kl: 0.017749215225211536
          policy_loss: -0.06398864379232255
          total_loss: 0.1085162653380798
          vf_explained_var: 0.8308896422386169
          vf_loss: 0.1527935275561216
    num_agent_steps_sampled: 1059576
    num_agent_steps_trained: 1059576
    num_steps_sampled: 1059576
    num_steps_trained: 1059576
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,106,32357.7,1059576,2.51812,14.72,-1.96,98.3762




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1069572
  custom_metrics: {}
  date: 2021-11-14_23-43-38
  done: false
  episode_len_mean: 100.32
  episode_media: {}
  episode_reward_max: 12.470000000000018
  episode_reward_mean: 2.5002000000000066
  episode_reward_min: -2.3099999999999987
  episodes_this_iter: 100
  episodes_total: 11046
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5866337866864653
          entropy_coeff: 0.01
          kl: 0.016359674381960412
          policy_loss: -0.06813534756000034
          total_loss: 0.08062377471763355
          vf_explained_var: 0.8361040949821472
          vf_loss: 0.132697405701137
    num_agent_steps_sampled: 1069572
    num_agent_steps_trained: 1069572
    num_steps_sampled: 1069572
    num_steps_trained: 1069572
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,107,32680,1069572,2.5002,12.47,-2.31,100.32




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1079568
  custom_metrics: {}
  date: 2021-11-14_23-49-04
  done: false
  episode_len_mean: 95.58653846153847
  episode_media: {}
  episode_reward_max: 10.180000000000021
  episode_reward_mean: 2.069711538461544
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 104
  episodes_total: 11150
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.589951076161148
          entropy_coeff: 0.01
          kl: 0.0170610805577582
          policy_loss: -0.06434753411807694
          total_loss: 0.09668753942809044
          vf_explained_var: 0.8596383333206177
          vf_loss: 0.14320890220582613
    num_agent_steps_sampled: 1079568
    num_agent_steps_trained: 1079568
    num_steps_sampled: 1079568
    num_steps_trained: 1079568
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,108,33006.2,1079568,2.06971,10.18,-2.05,95.5865


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1089564
  custom_metrics: {}
  date: 2021-11-14_23-54-01
  done: false
  episode_len_mean: 98.34313725490196
  episode_media: {}
  episode_reward_max: 20.849999999999916
  episode_reward_mean: 2.4480392156862805
  episode_reward_min: -1.6800000000000004
  episodes_this_iter: 102
  episodes_total: 11252
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5891721107001997
          entropy_coeff: 0.01
          kl: 0.016502736914578636
          policy_loss: -0.06466595971057366
          total_loss: 0.09377982970048539
          vf_explained_var: 0.8713749051094055
          vf_loss: 0.14204280193035418
    num_agent_steps_sampled: 1089564
    num_agent_steps_trained: 1089564
    num_steps_sampled: 1089564
    num_steps_trained: 1089564
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,109,33303.2,1089564,2.44804,20.85,-1.68,98.3431




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1099560
  custom_metrics: {}
  date: 2021-11-14_23-59-11
  done: false
  episode_len_mean: 98.74
  episode_media: {}
  episode_reward_max: 10.37000000000002
  episode_reward_mean: 2.4583000000000057
  episode_reward_min: -2.319999999999998
  episodes_this_iter: 100
  episodes_total: 11352
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.568427297294649
          entropy_coeff: 0.01
          kl: 0.01621665797669486
          policy_loss: -0.07146694174625426
          total_loss: 0.07634610066978405
          vf_explained_var: 0.8448246717453003
          vf_loss: 0.1319357954419385
    num_agent_steps_sampled: 1099560
    num_agent_steps_trained: 1099560
    num_steps_sampled: 1099560
    num_steps_trained: 1099560
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,110,33612.8,1099560,2.4583,10.37,-2.32,98.74


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1109556
  custom_metrics: {}
  date: 2021-11-15_00-04-08
  done: false
  episode_len_mean: 98.68627450980392
  episode_media: {}
  episode_reward_max: 14.690000000000017
  episode_reward_mean: 2.58803921568628
  episode_reward_min: -1.7000000000000006
  episodes_this_iter: 102
  episodes_total: 11454
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5669390744633143
          entropy_coeff: 0.01
          kl: 0.017160094294662434
          policy_loss: -0.06494401800764613
          total_loss: 0.09666318509082955
          vf_explained_var: 0.8670353889465332
          vf_loss: 0.14329714927679071
    num_agent_steps_sampled: 1109556
    num_agent_steps_trained: 1109556
    num_steps_sampled: 1109556
    num_steps_trained: 1109556
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,111,33909.7,1109556,2.58804,14.69,-1.7,98.6863




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1119552
  custom_metrics: {}
  date: 2021-11-15_00-09-20
  done: false
  episode_len_mean: 97.51456310679612
  episode_media: {}
  episode_reward_max: 14.620000000000017
  episode_reward_mean: 2.0346601941747626
  episode_reward_min: -1.8600000000000008
  episodes_this_iter: 103
  episodes_total: 11557
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5853434305924634
          entropy_coeff: 0.01
          kl: 0.015496173986975744
          policy_loss: -0.06810420788346958
          total_loss: 0.07100668132034504
          vf_explained_var: 0.8438402414321899
          vf_loss: 0.1252493255914977
    num_agent_steps_sampled: 1119552
    num_agent_steps_trained: 1119552
    num_steps_sampled: 1119552
    num_steps_trained: 1119552
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,112,34222.4,1119552,2.03466,14.62,-1.86,97.5146




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1129548
  custom_metrics: {}
  date: 2021-11-15_00-14-46
  done: false
  episode_len_mean: 98.74257425742574
  episode_media: {}
  episode_reward_max: 10.340000000000018
  episode_reward_mean: 2.449900990099016
  episode_reward_min: -1.9400000000000008
  episodes_this_iter: 101
  episodes_total: 11658
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.602197876139584
          entropy_coeff: 0.01
          kl: 0.01824496251301579
          policy_loss: -0.06270764992277847
          total_loss: 0.11886917642062991
          vf_explained_var: 0.8164681792259216
          vf_loss: 0.16083896392598174
    num_agent_steps_sampled: 1129548
    num_agent_steps_trained: 1129548
    num_steps_sampled: 1129548
    num_steps_trained: 1129548
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,113,34548.3,1129548,2.4499,10.34,-1.94,98.7426




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1139544
  custom_metrics: {}
  date: 2021-11-15_00-19-59
  done: false
  episode_len_mean: 98.65346534653466
  episode_media: {}
  episode_reward_max: 10.760000000000012
  episode_reward_mean: 2.641881188118818
  episode_reward_min: -1.840000000000001
  episodes_this_iter: 101
  episodes_total: 11759
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5781134154042626
          entropy_coeff: 0.01
          kl: 0.01680918668494258
          policy_loss: -0.06884122703893063
          total_loss: 0.08934708704821702
          vf_explained_var: 0.8742937445640564
          vf_loss: 0.1408893427373762
    num_agent_steps_sampled: 1139544
    num_agent_steps_trained: 1139544
    num_steps_sampled: 1139544
    num_steps_trained: 1139544
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,114,34861.2,1139544,2.64188,10.76,-1.84,98.6535




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1149540
  custom_metrics: {}
  date: 2021-11-15_00-25-05
  done: false
  episode_len_mean: 99.91
  episode_media: {}
  episode_reward_max: 10.520000000000017
  episode_reward_mean: 2.2596000000000065
  episode_reward_min: -2.11
  episodes_this_iter: 100
  episodes_total: 11859
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5953127427997753
          entropy_coeff: 0.01
          kl: 0.016515480709775706
          policy_loss: -0.0688974975508789
          total_loss: 0.09158797917577127
          vf_explained_var: 0.8850534558296204
          vf_loss: 0.14411123484755173
    num_agent_steps_sampled: 1149540
    num_agent_steps_trained: 1149540
    num_steps_sampled: 1149540
    num_steps_trained: 1149540
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,115,35167.5,1149540,2.2596,10.52,-2.11,99.91




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1159536
  custom_metrics: {}
  date: 2021-11-15_00-30-19
  done: false
  episode_len_mean: 98.58823529411765
  episode_media: {}
  episode_reward_max: 11.030000000000012
  episode_reward_mean: 2.6131372549019676
  episode_reward_min: -2.070000000000001
  episodes_this_iter: 102
  episodes_total: 11961
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.588864352356674
          entropy_coeff: 0.01
          kl: 0.017839263843168036
          policy_loss: -0.06660985738301697
          total_loss: 0.10328382134923123
          vf_explained_var: 0.8519287109375
          vf_loss: 0.15006224084486308
    num_agent_steps_sampled: 1159536
    num_agent_steps_trained: 1159536
    num_steps_sampled: 1159536
    num_steps_trained: 1159536
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,116,35480.9,1159536,2.61314,11.03,-2.07,98.5882




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1169532
  custom_metrics: {}
  date: 2021-11-15_00-35-48
  done: false
  episode_len_mean: 95.86538461538461
  episode_media: {}
  episode_reward_max: 10.880000000000015
  episode_reward_mean: 2.6829807692307757
  episode_reward_min: -1.840000000000001
  episodes_this_iter: 104
  episodes_total: 12065
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.600360066157121
          entropy_coeff: 0.01
          kl: 0.017218391800279637
          policy_loss: -0.06376424290629852
          total_loss: 0.1178300187501133
          vf_explained_var: 0.8292844295501709
          vf_loss: 0.16346900790977553
    num_agent_steps_sampled: 1169532
    num_agent_steps_trained: 1169532
    num_steps_sampled: 1169532
    num_steps_trained: 1169532
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,117,35810.2,1169532,2.68298,10.88,-1.84,95.8654




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1179528
  custom_metrics: {}
  date: 2021-11-15_00-41-01
  done: false
  episode_len_mean: 97.81553398058253
  episode_media: {}
  episode_reward_max: 9.830000000000002
  episode_reward_mean: 2.038737864077676
  episode_reward_min: -1.870000000000001
  episodes_this_iter: 103
  episodes_total: 12168
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6071134020120668
          entropy_coeff: 0.01
          kl: 0.017612580937226337
          policy_loss: -0.06358339602374431
          total_loss: 0.10643092292464441
          vf_explained_var: 0.8637513518333435
          vf_loss: 0.15094633609231592
    num_agent_steps_sampled: 1179528
    num_agent_steps_trained: 1179528
    num_steps_sampled: 1179528
    num_steps_trained: 1179528
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,118,36123.1,1179528,2.03874,9.83,-1.87,97.8155


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1189524
  custom_metrics: {}
  date: 2021-11-15_00-45-56
  done: false
  episode_len_mean: 99.99
  episode_media: {}
  episode_reward_max: 8.760000000000012
  episode_reward_mean: 2.511900000000006
  episode_reward_min: -1.7700000000000007
  episodes_this_iter: 99
  episodes_total: 12267
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.5948652057566193
          entropy_coeff: 0.01
          kl: 0.018633186181617235
          policy_loss: -0.06338343634857183
          total_loss: 0.10997998144589047
          vf_explained_var: 0.8221212029457092
          vf_loss: 0.151557252708122
    num_agent_steps_sampled: 1189524
    num_agent_steps_trained: 1189524
    num_steps_sampled: 1189524
    num_steps_trained: 1189524
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,119,36418,1189524,2.5119,8.76,-1.77,99.99


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1199520
  custom_metrics: {}
  date: 2021-11-15_00-50-52
  done: false
  episode_len_mean: 99.16831683168317
  episode_media: {}
  episode_reward_max: 14.810000000000015
  episode_reward_mean: 2.2886138613861435
  episode_reward_min: -2.0599999999999996
  episodes_this_iter: 101
  episodes_total: 12368
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6052775774246606
          entropy_coeff: 0.01
          kl: 0.016942326375182275
          policy_loss: -0.06299854558215946
          total_loss: 0.09010785841144239
          vf_explained_var: 0.8294541239738464
          vf_loss: 0.1357378523872417
    num_agent_steps_sampled: 1199520
    num_agent_steps_trained: 1199520
    num_steps_sampled: 1199520
    num_steps_trained: 1199520
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,120,36713.7,1199520,2.28861,14.81,-2.06,99.1683




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1209516
  custom_metrics: {}
  date: 2021-11-15_00-56-00
  done: false
  episode_len_mean: 99.48514851485149
  episode_media: {}
  episode_reward_max: 10.960000000000013
  episode_reward_mean: 2.366930693069313
  episode_reward_min: -2.2000000000000006
  episodes_this_iter: 101
  episodes_total: 12469
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6083568137935083
          entropy_coeff: 0.01
          kl: 0.017333055365487092
          policy_loss: -0.06268761141751057
          total_loss: 0.09304099617820456
          vf_explained_var: 0.8385345935821533
          vf_loss: 0.13738945262928484
    num_agent_steps_sampled: 1209516
    num_agent_steps_trained: 1209516
    num_steps_sampled: 1209516
    num_steps_trained: 1209516
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,121,37021.9,1209516,2.36693,10.96,-2.2,99.4851




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1219512
  custom_metrics: {}
  date: 2021-11-15_01-01-10
  done: false
  episode_len_mean: 100.2
  episode_media: {}
  episode_reward_max: 10.550000000000018
  episode_reward_mean: 2.342100000000006
  episode_reward_min: -1.800000000000001
  episodes_this_iter: 99
  episodes_total: 12568
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6124513992896445
          entropy_coeff: 0.01
          kl: 0.017349045517535234
          policy_loss: -0.06020720117669712
          total_loss: 0.10589095451988471
          vf_explained_var: 0.8350613117218018
          vf_loss: 0.14775896508557101
    num_agent_steps_sampled: 1219512
    num_agent_steps_trained: 1219512
    num_steps_sampled: 1219512
    num_steps_trained: 1219512
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,122,37331.4,1219512,2.3421,10.55,-1.8,100.2


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1229508
  custom_metrics: {}
  date: 2021-11-15_01-06-04
  done: false
  episode_len_mean: 99.45
  episode_media: {}
  episode_reward_max: 12.480000000000018
  episode_reward_mean: 2.873200000000007
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 100
  episodes_total: 12668
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.608261782185644
          entropy_coeff: 0.01
          kl: 0.01633254248038739
          policy_loss: -0.06544352780239514
          total_loss: 0.087674129067554
          vf_explained_var: 0.855833888053894
          vf_loss: 0.1373417567095568
    num_agent_steps_sampled: 1229508
    num_agent_steps_trained: 1229508
    num_steps_sampled: 1229508
    num_steps_trained: 1229508
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,123,37625.9,1229508,2.8732,12.48,-2.03,99.45




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1239504
  custom_metrics: {}
  date: 2021-11-15_01-11-18
  done: false
  episode_len_mean: 96.6923076923077
  episode_media: {}
  episode_reward_max: 8.180000000000017
  episode_reward_mean: 1.9314423076923133
  episode_reward_min: -1.840000000000001
  episodes_this_iter: 104
  episodes_total: 12772
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6137132520349615
          entropy_coeff: 0.01
          kl: 0.015967303512150047
          policy_loss: -0.06548812543201205
          total_loss: 0.06981159141804609
          vf_explained_var: 0.8839840888977051
          vf_loss: 0.12051439886976384
    num_agent_steps_sampled: 1239504
    num_agent_steps_trained: 1239504
    num_steps_sampled: 1239504
    num_steps_trained: 1239504
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,124,37939.5,1239504,1.93144,8.18,-1.84,96.6923


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1249500
  custom_metrics: {}
  date: 2021-11-15_01-16-13
  done: false
  episode_len_mean: 98.62745098039215
  episode_media: {}
  episode_reward_max: 8.820000000000014
  episode_reward_mean: 2.3911764705882423
  episode_reward_min: -1.800000000000001
  episodes_this_iter: 102
  episodes_total: 12874
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.604096193904551
          entropy_coeff: 0.01
          kl: 0.017479444609215994
          policy_loss: -0.05859993982932761
          total_loss: 0.09610070897242388
          vf_explained_var: 0.8691852688789368
          vf_loss: 0.13594370762674282
    num_agent_steps_sampled: 1249500
    num_agent_steps_trained: 1249500
    num_steps_sampled: 1249500
    num_steps_trained: 1249500
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,125,38234.5,1249500,2.39118,8.82,-1.8,98.6275




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1259496
  custom_metrics: {}
  date: 2021-11-15_01-21-19
  done: false
  episode_len_mean: 99.42
  episode_media: {}
  episode_reward_max: 10.900000000000018
  episode_reward_mean: 2.451700000000006
  episode_reward_min: -1.8300000000000007
  episodes_this_iter: 100
  episodes_total: 12974
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.616882066033844
          entropy_coeff: 0.01
          kl: 0.016214119921373556
          policy_loss: -0.06749549262368908
          total_loss: 0.08027267845339564
          vf_explained_var: 0.8333847522735596
          vf_loss: 0.13238197697533502
    num_agent_steps_sampled: 1259496
    num_agent_steps_trained: 1259496
    num_steps_sampled: 1259496
    num_steps_trained: 1259496
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,126,38540.4,1259496,2.4517,10.9,-1.83,99.42


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1269492
  custom_metrics: {}
  date: 2021-11-15_01-26-16
  done: false
  episode_len_mean: 98.13725490196079
  episode_media: {}
  episode_reward_max: 10.520000000000012
  episode_reward_mean: 2.244313725490202
  episode_reward_min: -2.0799999999999987
  episodes_this_iter: 102
  episodes_total: 13076
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6129688119276975
          entropy_coeff: 0.01
          kl: 0.017297562879548967
          policy_loss: -0.065381052900647
          total_loss: 0.08939508227352849
          vf_explained_var: 0.8617384433746338
          vf_loss: 0.13657406331597166
    num_agent_steps_sampled: 1269492
    num_agent_steps_trained: 1269492
    num_steps_sampled: 1269492
    num_steps_trained: 1269492
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,127,38837.6,1269492,2.24431,10.52,-2.08,98.1373




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1279488
  custom_metrics: {}
  date: 2021-11-15_01-31-27
  done: false
  episode_len_mean: 98.04901960784314
  episode_media: {}
  episode_reward_max: 12.510000000000018
  episode_reward_mean: 2.4436274509803977
  episode_reward_min: -2.0900000000000007
  episodes_this_iter: 102
  episodes_total: 13178
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6169697508852705
          entropy_coeff: 0.01
          kl: 0.015441692915214857
          policy_loss: -0.06531632920950015
          total_loss: 0.07963179954025162
          vf_explained_var: 0.8769989013671875
          vf_loss: 0.1315424564202977
    num_agent_steps_sampled: 1279488
    num_agent_steps_trained: 1279488
    num_steps_sampled: 1279488
    num_steps_trained: 1279488
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,128,39148.2,1279488,2.44363,12.51,-2.09,98.049




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1289484
  custom_metrics: {}
  date: 2021-11-15_01-36-32
  done: false
  episode_len_mean: 99.75
  episode_media: {}
  episode_reward_max: 8.270000000000017
  episode_reward_mean: 2.0312000000000068
  episode_reward_min: -2.279999999999999
  episodes_this_iter: 100
  episodes_total: 13278
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6304624702176476
          entropy_coeff: 0.01
          kl: 0.016127933100337985
          policy_loss: -0.06181645180202193
          total_loss: 0.09285315143991198
          vf_explained_var: 0.856195867061615
          vf_loss: 0.13964009981196468
    num_agent_steps_sampled: 1289484
    num_agent_steps_trained: 1289484
    num_steps_sampled: 1289484
    num_steps_trained: 1289484
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,129,39453.8,1289484,2.0312,8.27,-2.28,99.75


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1299480
  custom_metrics: {}
  date: 2021-11-15_01-41-31
  done: false
  episode_len_mean: 98.95049504950495
  episode_media: {}
  episode_reward_max: 8.770000000000016
  episode_reward_mean: 2.4188118811881263
  episode_reward_min: -1.960000000000001
  episodes_this_iter: 101
  episodes_total: 13379
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6333629376867895
          entropy_coeff: 0.01
          kl: 0.017739664304850845
          policy_loss: -0.06112696092543948
          total_loss: 0.09738123807100914
          vf_explained_var: 0.8725289106369019
          vf_loss: 0.13937701075138825
    num_agent_steps_sampled: 1299480
    num_agent_steps_trained: 1299480
    num_steps_sampled: 1299480
    num_steps_trained: 1299480
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,130,39753,1299480,2.41881,8.77,-1.96,98.9505




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1309476
  custom_metrics: {}
  date: 2021-11-15_01-46-39
  done: false
  episode_len_mean: 99.57425742574257
  episode_media: {}
  episode_reward_max: 12.570000000000016
  episode_reward_mean: 2.060000000000005
  episode_reward_min: -1.9800000000000009
  episodes_this_iter: 101
  episodes_total: 13480
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6447989916190124
          entropy_coeff: 0.01
          kl: 0.01678037737129894
          policy_loss: -0.0624418323326251
          total_loss: 0.0871041450347019
          vf_explained_var: 0.8526861071586609
          vf_loss: 0.1329876966197362
    num_agent_steps_sampled: 1309476
    num_agent_steps_trained: 1309476
    num_steps_sampled: 1309476
    num_steps_trained: 1309476
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,131,40061,1309476,2.06,12.57,-1.98,99.5743


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1319472
  custom_metrics: {}
  date: 2021-11-15_01-51-34
  done: false
  episode_len_mean: 100.36
  episode_media: {}
  episode_reward_max: 12.430000000000016
  episode_reward_mean: 2.7214000000000085
  episode_reward_min: -1.8700000000000008
  episodes_this_iter: 98
  episodes_total: 13578
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6142716000222754
          entropy_coeff: 0.01
          kl: 0.016493092099638833
          policy_loss: -0.05876524340218076
          total_loss: 0.11436910788751502
          vf_explained_var: 0.8531970977783203
          vf_loss: 0.15700707763759816
    num_agent_steps_sampled: 1319472
    num_agent_steps_trained: 1319472
    num_steps_sampled: 1319472
    num_steps_trained: 1319472
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,132,40355.4,1319472,2.7214,12.43,-1.87,100.36




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1329468
  custom_metrics: {}
  date: 2021-11-15_01-56-41
  done: false
  episode_len_mean: 99.03921568627452
  episode_media: {}
  episode_reward_max: 12.160000000000018
  episode_reward_mean: 2.317549019607849
  episode_reward_min: -2.290000000000001
  episodes_this_iter: 102
  episodes_total: 13680
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6250116020186334
          entropy_coeff: 0.01
          kl: 0.015788259996502352
          policy_loss: -0.06270693168362491
          total_loss: 0.08965255485760032
          vf_explained_var: 0.846373438835144
          vf_loss: 0.1381460197464937
    num_agent_steps_sampled: 1329468
    num_agent_steps_trained: 1329468
    num_steps_sampled: 1329468
    num_steps_trained: 1329468
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,133,40663,1329468,2.31755,12.16,-2.29,99.0392


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1339464
  custom_metrics: {}
  date: 2021-11-15_02-01-35
  done: false
  episode_len_mean: 101.35
  episode_media: {}
  episode_reward_max: 14.31000000000002
  episode_reward_mean: 2.4095000000000066
  episode_reward_min: -1.770000000000001
  episodes_this_iter: 98
  episodes_total: 13778
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.640960621833801
          entropy_coeff: 0.01
          kl: 0.014967099273682559
          policy_loss: -0.06394343674620694
          total_loss: 0.06478916503186537
          vf_explained_var: 0.8625351786613464
          vf_loss: 0.11678317009718117
    num_agent_steps_sampled: 1339464
    num_agent_steps_trained: 1339464
    num_steps_sampled: 1339464
    num_steps_trained: 1339464
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,134,40956.3,1339464,2.4095,14.31,-1.77,101.35




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1349460
  custom_metrics: {}
  date: 2021-11-15_02-06-44
  done: false
  episode_len_mean: 97.35922330097087
  episode_media: {}
  episode_reward_max: 10.08000000000002
  episode_reward_mean: 2.4791262135922394
  episode_reward_min: -2.070000000000001
  episodes_this_iter: 103
  episodes_total: 13881
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6332993226173596
          entropy_coeff: 0.01
          kl: 0.017267062886380462
          policy_loss: -0.06331477536716396
          total_loss: 0.08206274833004826
          vf_explained_var: 0.8622965216636658
          vf_loss: 0.1274569237186836
    num_agent_steps_sampled: 1349460
    num_agent_steps_trained: 1349460
    num_steps_sampled: 1349460
    num_steps_trained: 1349460
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,135,41265.8,1349460,2.47913,10.08,-2.07,97.3592




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1359456
  custom_metrics: {}
  date: 2021-11-15_02-11-51
  done: false
  episode_len_mean: 98.66336633663366
  episode_media: {}
  episode_reward_max: 10.840000000000016
  episode_reward_mean: 2.316237623762382
  episode_reward_min: -1.790000000000001
  episodes_this_iter: 101
  episodes_total: 13982
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6436784936831548
          entropy_coeff: 0.01
          kl: 0.016515959896868646
          policy_loss: -0.05696920186846366
          total_loss: 0.09437332079650308
          vf_explained_var: 0.8419235348701477
          vf_loss: 0.13545070975007983
    num_agent_steps_sampled: 1359456
    num_agent_steps_trained: 1359456
    num_steps_sampled: 1359456
    num_steps_trained: 1359456
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,136,41572.8,1359456,2.31624,10.84,-1.79,98.6634




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1369452
  custom_metrics: {}
  date: 2021-11-15_02-17-14
  done: false
  episode_len_mean: 97.28846153846153
  episode_media: {}
  episode_reward_max: 12.670000000000014
  episode_reward_mean: 2.6391346153846227
  episode_reward_min: -1.8100000000000014
  episodes_this_iter: 104
  episodes_total: 14086
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6433847593446065
          entropy_coeff: 0.01
          kl: 0.015786763480182008
          policy_loss: -0.06069217091505853
          total_loss: 0.0795315880053796
          vf_explained_var: 0.8477365970611572
          vf_loss: 0.12619785946499334
    num_agent_steps_sampled: 1369452
    num_agent_steps_trained: 1369452
    num_steps_sampled: 1369452
    num_steps_trained: 1369452
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,137,41895.6,1369452,2.63913,12.67,-1.81,97.2885




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1379448
  custom_metrics: {}
  date: 2021-11-15_02-22-24
  done: false
  episode_len_mean: 97.68316831683168
  episode_media: {}
  episode_reward_max: 8.850000000000014
  episode_reward_mean: 2.400792079207928
  episode_reward_min: -2.5299999999999967
  episodes_this_iter: 101
  episodes_total: 14187
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.634322826577048
          entropy_coeff: 0.01
          kl: 0.01619134033535278
          policy_loss: -0.06067008054775433
          total_loss: 0.09525462332237353
          vf_explained_var: 0.8336390852928162
          vf_loss: 0.14077129922170414
    num_agent_steps_sampled: 1379448
    num_agent_steps_trained: 1379448
    num_steps_sampled: 1379448
    num_steps_trained: 1379448
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,138,42205.3,1379448,2.40079,8.85,-2.53,97.6832


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1389444
  custom_metrics: {}
  date: 2021-11-15_02-27-19
  done: false
  episode_len_mean: 100.23
  episode_media: {}
  episode_reward_max: 10.330000000000013
  episode_reward_mean: 2.988100000000008
  episode_reward_min: -1.7500000000000009
  episodes_this_iter: 100
  episodes_total: 14287
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6114696829746933
          entropy_coeff: 0.01
          kl: 0.018536299169466693
          policy_loss: -0.05414630868591559
          total_loss: 0.12755010642756062
          vf_explained_var: 0.8577349781990051
          vf_loss: 0.16030460601099408
    num_agent_steps_sampled: 1389444
    num_agent_steps_trained: 1389444
    num_steps_sampled: 1389444
    num_steps_trained: 1389444
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,139,42500.8,1389444,2.9881,10.33,-1.75,100.23




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1399440
  custom_metrics: {}
  date: 2021-11-15_02-32-28
  done: false
  episode_len_mean: 98.5
  episode_media: {}
  episode_reward_max: 10.510000000000016
  episode_reward_mean: 2.2027450980392214
  episode_reward_min: -1.6700000000000006
  episodes_this_iter: 102
  episodes_total: 14389
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.643448496272421
          entropy_coeff: 0.01
          kl: 0.016914277479628165
          policy_loss: -0.06046926287265542
          total_loss: 0.08528999078334269
          vf_explained_var: 0.86700439453125
          vf_loss: 0.12884429648765322
    num_agent_steps_sampled: 1399440
    num_agent_steps_trained: 1399440
    num_steps_sampled: 1399440
    num_steps_trained: 1399440
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,140,42808.8,1399440,2.20275,10.51,-1.67,98.5




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1409436
  custom_metrics: {}
  date: 2021-11-15_02-37-42
  done: false
  episode_len_mean: 98.30392156862744
  episode_media: {}
  episode_reward_max: 8.710000000000013
  episode_reward_mean: 2.3529411764705945
  episode_reward_min: -1.5900000000000005
  episodes_this_iter: 102
  episodes_total: 14491
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.638044575544504
          entropy_coeff: 0.01
          kl: 0.016372209362443865
          policy_loss: -0.060495027879842074
          total_loss: 0.0918713849503547
          vf_explained_var: 0.8591356873512268
          vf_loss: 0.13678667824516375
    num_agent_steps_sampled: 1409436
    num_agent_steps_trained: 1409436
    num_steps_sampled: 1409436
    num_steps_trained: 1409436
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,141,43123.2,1409436,2.35294,8.71,-1.59,98.3039




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1419432
  custom_metrics: {}
  date: 2021-11-15_02-42-54
  done: false
  episode_len_mean: 97.92156862745098
  episode_media: {}
  episode_reward_max: 10.490000000000018
  episode_reward_mean: 2.1190196078431436
  episode_reward_min: -2.0199999999999982
  episodes_this_iter: 102
  episodes_total: 14593
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6488611731773766
          entropy_coeff: 0.01
          kl: 0.014764808897967939
          policy_loss: -0.06362981705599997
          total_loss: 0.07287089674351498
          vf_explained_var: 0.8506547808647156
          vf_loss: 0.1251487356546916
    num_agent_steps_sampled: 1419432
    num_agent_steps_trained: 1419432
    num_steps_sampled: 1419432
    num_steps_trained: 1419432
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,142,43435.2,1419432,2.11902,10.49,-2.02,97.9216




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1429428
  custom_metrics: {}
  date: 2021-11-15_02-48-09
  done: false
  episode_len_mean: 97.42718446601941
  episode_media: {}
  episode_reward_max: 8.570000000000016
  episode_reward_mean: 1.9612621359223352
  episode_reward_min: -1.970000000000001
  episodes_this_iter: 103
  episodes_total: 14696
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6445695796583455
          entropy_coeff: 0.01
          kl: 0.016428527520784306
          policy_loss: -0.05893449045749556
          total_loss: 0.08804027946809163
          vf_explained_var: 0.8202545046806335
          vf_loss: 0.13131594744104988
    num_agent_steps_sampled: 1429428
    num_agent_steps_trained: 1429428
    num_steps_sampled: 1429428
    num_steps_trained: 1429428
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,143,43749.9,1429428,1.96126,8.57,-1.97,97.4272




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1439424
  custom_metrics: {}
  date: 2021-11-15_02-53-22
  done: false
  episode_len_mean: 98.1470588235294
  episode_media: {}
  episode_reward_max: 14.790000000000019
  episode_reward_mean: 2.754117647058831
  episode_reward_min: -1.6600000000000008
  episodes_this_iter: 102
  episodes_total: 14798
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.638786750165825
          entropy_coeff: 0.01
          kl: 0.016345783924156154
          policy_loss: -0.05594563520131394
          total_loss: 0.11456069933632627
          vf_explained_var: 0.8249602913856506
          vf_loss: 0.1550017465546759
    num_agent_steps_sampled: 1439424
    num_agent_steps_trained: 1439424
    num_steps_sampled: 1439424
    num_steps_trained: 1439424
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,144,44063.2,1439424,2.75412,14.79,-1.66,98.1471




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1449420
  custom_metrics: {}
  date: 2021-11-15_02-58-28
  done: false
  episode_len_mean: 98.67326732673267
  episode_media: {}
  episode_reward_max: 10.310000000000016
  episode_reward_mean: 2.604455445544562
  episode_reward_min: -1.9100000000000008
  episodes_this_iter: 101
  episodes_total: 14899
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.644463712741167
          entropy_coeff: 0.01
          kl: 0.016164827612134103
          policy_loss: -0.0600340135793528
          total_loss: 0.09261246295009032
          vf_explained_var: 0.8611262440681458
          vf_loss: 0.13766242982182875
    num_agent_steps_sampled: 1449420
    num_agent_steps_trained: 1449420
    num_steps_sampled: 1449420
    num_steps_trained: 1449420
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,145,44369.4,1449420,2.60446,10.31,-1.91,98.6733


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1459416
  custom_metrics: {}
  date: 2021-11-15_03-03-25
  done: false
  episode_len_mean: 97.06862745098039
  episode_media: {}
  episode_reward_max: 12.390000000000017
  episode_reward_mean: 2.277549019607849
  episode_reward_min: -1.9700000000000009
  episodes_this_iter: 102
  episodes_total: 15001
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6517691931153973
          entropy_coeff: 0.01
          kl: 0.01571332213451934
          policy_loss: -0.06043560990474672
          total_loss: 0.08543580148576034
          vf_explained_var: 0.8269369006156921
          vf_loss: 0.13211757791363912
    num_agent_steps_sampled: 1459416
    num_agent_steps_trained: 1459416
    num_steps_sampled: 1459416
    num_steps_trained: 1459416
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,146,44665.6,1459416,2.27755,12.39,-1.97,97.0686




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1469412
  custom_metrics: {}
  date: 2021-11-15_03-08-35
  done: false
  episode_len_mean: 98.28155339805825
  episode_media: {}
  episode_reward_max: 8.29000000000002
  episode_reward_mean: 2.2136893203883554
  episode_reward_min: -2.0300000000000002
  episodes_this_iter: 103
  episodes_total: 15104
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.666310532989665
          entropy_coeff: 0.01
          kl: 0.015359766896013377
          policy_loss: -0.059973696179879014
          total_loss: 0.07995316905375474
          vf_explained_var: 0.8252037763595581
          vf_loss: 0.1272245686310224
    num_agent_steps_sampled: 1469412
    num_agent_steps_trained: 1469412
    num_steps_sampled: 1469412
    num_steps_trained: 1469412
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,147,44975.8,1469412,2.21369,8.29,-2.03,98.2816




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1479408
  custom_metrics: {}
  date: 2021-11-15_03-13-48
  done: false
  episode_len_mean: 95.18446601941747
  episode_media: {}
  episode_reward_max: 12.45000000000002
  episode_reward_mean: 2.244951456310685
  episode_reward_min: -2.000000000000001
  episodes_this_iter: 103
  episodes_total: 15207
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6478657573716253
          entropy_coeff: 0.01
          kl: 0.01772684030814817
          policy_loss: -0.0555005941245482
          total_loss: 0.09787127552434612
          vf_explained_var: 0.8425770998001099
          vf_loss: 0.1344185762449653
    num_agent_steps_sampled: 1479408
    num_agent_steps_trained: 1479408
    num_steps_sampled: 1479408
    num_steps_trained: 1479408
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,148,45288.9,1479408,2.24495,12.45,-2,95.1845




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1489404
  custom_metrics: {}
  date: 2021-11-15_03-18-58
  done: false
  episode_len_mean: 99.24509803921569
  episode_media: {}
  episode_reward_max: 6.670000000000014
  episode_reward_mean: 1.7242156862745144
  episode_reward_min: -1.9700000000000015
  episodes_this_iter: 102
  episodes_total: 15309
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6680099297792483
          entropy_coeff: 0.01
          kl: 0.01372749997287528
          policy_loss: -0.060835595334617375
          total_loss: 0.06291287905023171
          vf_explained_var: 0.8275153636932373
          vf_loss: 0.115246493765352
    num_agent_steps_sampled: 1489404
    num_agent_steps_trained: 1489404
    num_steps_sampled: 1489404
    num_steps_trained: 1489404
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,149,45598.5,1489404,1.72422,6.67,-1.97,99.2451




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1499400
  custom_metrics: {}
  date: 2021-11-15_03-24-07
  done: false
  episode_len_mean: 98.32352941176471
  episode_media: {}
  episode_reward_max: 11.060000000000015
  episode_reward_mean: 2.3709803921568695
  episode_reward_min: -1.910000000000001
  episodes_this_iter: 102
  episodes_total: 15411
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.647897693540296
          entropy_coeff: 0.01
          kl: 0.017346735539677558
          policy_loss: -0.055328630198302683
          total_loss: 0.10827566676765171
          vf_explained_var: 0.8409123420715332
          vf_loss: 0.14562548926084215
    num_agent_steps_sampled: 1499400
    num_agent_steps_trained: 1499400
    num_steps_sampled: 1499400
    num_steps_trained: 1499400
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,150,45907.4,1499400,2.37098,11.06,-1.91,98.3235




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1509396
  custom_metrics: {}
  date: 2021-11-15_03-29-19
  done: false
  episode_len_mean: 96.62135922330097
  episode_media: {}
  episode_reward_max: 10.910000000000014
  episode_reward_mean: 2.5686407766990365
  episode_reward_min: -1.6600000000000008
  episodes_this_iter: 103
  episodes_total: 15514
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.648266530342591
          entropy_coeff: 0.01
          kl: 0.015940994939286385
          policy_loss: -0.055783658735573484
          total_loss: 0.11634699366747951
          vf_explained_var: 0.8546465039253235
          vf_loss: 0.1577582923297444
    num_agent_steps_sampled: 1509396
    num_agent_steps_trained: 1509396
    num_steps_sampled: 1509396
    num_steps_trained: 1509396
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,151,46220.3,1509396,2.56864,10.91,-1.66,96.6214




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1519392
  custom_metrics: {}
  date: 2021-11-15_03-34-30
  done: false
  episode_len_mean: 98.41584158415841
  episode_media: {}
  episode_reward_max: 12.000000000000021
  episode_reward_mean: 2.2440594059406
  episode_reward_min: -1.9700000000000004
  episodes_this_iter: 101
  episodes_total: 15615
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.666720413346576
          entropy_coeff: 0.01
          kl: 0.015412557655425891
          policy_loss: -0.05664537956094385
          total_loss: 0.08537610845369661
          vf_explained_var: 0.8296871185302734
          vf_loss: 0.1291879940005895
    num_agent_steps_sampled: 1519392
    num_agent_steps_trained: 1519392
    num_steps_sampled: 1519392
    num_steps_trained: 1519392
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,152,46531.1,1519392,2.24406,12,-1.97,98.4158


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1529388
  custom_metrics: {}
  date: 2021-11-15_03-39-27
  done: false
  episode_len_mean: 98.17647058823529
  episode_media: {}
  episode_reward_max: 8.580000000000013
  episode_reward_mean: 2.1350980392156926
  episode_reward_min: -2.489999999999999
  episodes_this_iter: 102
  episodes_total: 15717
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6701439398985642
          entropy_coeff: 0.01
          kl: 0.01582845046979009
          policy_loss: -0.055637894948530525
          total_loss: 0.09180078469614825
          vf_explained_var: 0.8443768620491028
          vf_loss: 0.13357353215145631
    num_agent_steps_sampled: 1529388
    num_agent_steps_trained: 1529388
    num_steps_sampled: 1529388
    num_steps_trained: 1529388
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,153,46828.3,1529388,2.1351,8.58,-2.49,98.1765




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1539384
  custom_metrics: {}
  date: 2021-11-15_03-44-54
  done: false
  episode_len_mean: 95.625
  episode_media: {}
  episode_reward_max: 10.150000000000018
  episode_reward_mean: 2.2299038461538516
  episode_reward_min: -1.7600000000000007
  episodes_this_iter: 104
  episodes_total: 15821
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.67065138725134
          entropy_coeff: 0.01
          kl: 0.016083413886862602
          policy_loss: -0.054729240413946216
          total_loss: 0.10139423697156051
          vf_explained_var: 0.8547482490539551
          vf_loss: 0.1416099614575187
    num_agent_steps_sampled: 1539384
    num_agent_steps_trained: 1539384
    num_steps_sampled: 1539384
    num_steps_trained: 1539384
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,154,47154.7,1539384,2.2299,10.15,-1.76,95.625


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1549380
  custom_metrics: {}
  date: 2021-11-15_03-49-55
  done: false
  episode_len_mean: 97.40384615384616
  episode_media: {}
  episode_reward_max: 10.22000000000002
  episode_reward_mean: 1.9691346153846205
  episode_reward_min: -2.0100000000000007
  episodes_this_iter: 104
  episodes_total: 15925
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6717346327936546
          entropy_coeff: 0.01
          kl: 0.01557255666392614
          policy_loss: -0.052734150897520475
          total_loss: 0.10457998621675513
          vf_explained_var: 0.8180750012397766
          vf_loss: 0.14412072443634144
    num_agent_steps_sampled: 1549380
    num_agent_steps_trained: 1549380
    num_steps_sampled: 1549380
    num_steps_trained: 1549380
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,155,47456,1549380,1.96913,10.22,-2.01,97.4038




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1559376
  custom_metrics: {}
  date: 2021-11-15_03-55-06
  done: false
  episode_len_mean: 96.59223300970874
  episode_media: {}
  episode_reward_max: 10.45000000000002
  episode_reward_mean: 2.406213592233015
  episode_reward_min: -1.7500000000000009
  episodes_this_iter: 103
  episodes_total: 16028
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6633143810125497
          entropy_coeff: 0.01
          kl: 0.017918075306459297
          policy_loss: -0.05015773027259697
          total_loss: 0.10925060703387308
          vf_explained_var: 0.8044214248657227
          vf_loss: 0.1401194142185661
    num_agent_steps_sampled: 1559376
    num_agent_steps_trained: 1559376
    num_steps_sampled: 1559376
    num_steps_trained: 1559376
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,156,47766.3,1559376,2.40621,10.45,-1.75,96.5922




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1569372
  custom_metrics: {}
  date: 2021-11-15_04-00-15
  done: false
  episode_len_mean: 96.33980582524272
  episode_media: {}
  episode_reward_max: 10.690000000000015
  episode_reward_mean: 2.0504854368932097
  episode_reward_min: -1.9500000000000008
  episodes_this_iter: 103
  episodes_total: 16131
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6707994643439594
          entropy_coeff: 0.01
          kl: 0.01524271862977992
          policy_loss: -0.05227227007341372
          total_loss: 0.09828401076344725
          vf_explained_var: 0.8170406222343445
          vf_loss: 0.1381988562977849
    num_agent_steps_sampled: 1569372
    num_agent_steps_trained: 1569372
    num_steps_sampled: 1569372
    num_steps_trained: 1569372
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,157,48075.7,1569372,2.05049,10.69,-1.95,96.3398




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1579368
  custom_metrics: {}
  date: 2021-11-15_04-05-29
  done: false
  episode_len_mean: 95.47169811320755
  episode_media: {}
  episode_reward_max: 8.590000000000018
  episode_reward_mean: 1.9729245283018921
  episode_reward_min: -2.0900000000000007
  episodes_this_iter: 106
  episodes_total: 16237
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6773483406784186
          entropy_coeff: 0.01
          kl: 0.014241977035159316
          policy_loss: -0.053444093183622275
          total_loss: 0.06989882150664925
          vf_explained_var: 0.8371768593788147
          vf_loss: 0.11361576988138895
    num_agent_steps_sampled: 1579368
    num_agent_steps_trained: 1579368
    num_steps_sampled: 1579368
    num_steps_trained: 1579368
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,158,48389.9,1579368,1.97292,8.59,-2.09,95.4717




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1589364
  custom_metrics: {}
  date: 2021-11-15_04-10-39
  done: false
  episode_len_mean: 98.53465346534654
  episode_media: {}
  episode_reward_max: 10.190000000000017
  episode_reward_mean: 1.7317821782178269
  episode_reward_min: -2.339999999999998
  episodes_this_iter: 101
  episodes_total: 16338
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6777695166759004
          entropy_coeff: 0.01
          kl: 0.017859044554798063
          policy_loss: -0.04990654591606277
          total_loss: 0.11539947394536347
          vf_explained_var: 0.8255412578582764
          vf_loss: 0.14631293802084322
    num_agent_steps_sampled: 1589364
    num_agent_steps_trained: 1589364
    num_steps_sampled: 1589364
    num_steps_trained: 1589364
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,159,48699.2,1589364,1.73178,10.19,-2.34,98.5347




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1599360
  custom_metrics: {}
  date: 2021-11-15_04-15-56
  done: false
  episode_len_mean: 94.79047619047618
  episode_media: {}
  episode_reward_max: 10.92000000000002
  episode_reward_mean: 2.528000000000006
  episode_reward_min: -1.8100000000000012
  episodes_this_iter: 105
  episodes_total: 16443
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6633424263734082
          entropy_coeff: 0.01
          kl: 0.017729444599963368
          policy_loss: -0.04914713422210616
          total_loss: 0.11045354447649935
          vf_explained_var: 0.8626063466072083
          vf_loss: 0.14079547602977827
    num_agent_steps_sampled: 1599360
    num_agent_steps_trained: 1599360
    num_steps_sampled: 1599360
    num_steps_trained: 1599360
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,160,49016.3,1599360,2.528,10.92,-1.81,94.7905




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1609356
  custom_metrics: {}
  date: 2021-11-15_04-21-10
  done: false
  episode_len_mean: 96.28571428571429
  episode_media: {}
  episode_reward_max: 9.100000000000016
  episode_reward_mean: 2.614476190476197
  episode_reward_min: -1.970000000000001
  episodes_this_iter: 105
  episodes_total: 16548
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.657772961946634
          entropy_coeff: 0.01
          kl: 0.01745099114832472
          policy_loss: -0.0448894127110879
          total_loss: 0.15774467844977721
          vf_explained_var: 0.8207930326461792
          vf_loss: 0.18448684034813354
    num_agent_steps_sampled: 1609356
    num_agent_steps_trained: 1609356
    num_steps_sampled: 1609356
    num_steps_trained: 1609356
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,161,49330.2,1609356,2.61448,9.1,-1.97,96.2857


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1619352
  custom_metrics: {}
  date: 2021-11-15_04-26-07
  done: false
  episode_len_mean: 97.9009900990099
  episode_media: {}
  episode_reward_max: 8.110000000000017
  episode_reward_mean: 1.8228712871287183
  episode_reward_min: -2.1399999999999983
  episodes_this_iter: 101
  episodes_total: 16649
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6843418926255316
          entropy_coeff: 0.01
          kl: 0.01522420576215756
          policy_loss: -0.047472270148304796
          total_loss: 0.11091215377633706
          vf_explained_var: 0.8391638994216919
          vf_loss: 0.146209868723447
    num_agent_steps_sampled: 1619352
    num_agent_steps_trained: 1619352
    num_steps_sampled: 1619352
    num_steps_trained: 1619352
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,162,49627.1,1619352,1.82287,8.11,-2.14,97.901




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1629348
  custom_metrics: {}
  date: 2021-11-15_04-31-18
  done: false
  episode_len_mean: 97.2621359223301
  episode_media: {}
  episode_reward_max: 10.630000000000015
  episode_reward_mean: 2.6081553398058324
  episode_reward_min: -1.760000000000001
  episodes_this_iter: 103
  episodes_total: 16752
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6700237523796213
          entropy_coeff: 0.01
          kl: 0.015479973900780273
          policy_loss: -0.05023588055474126
          total_loss: 0.09365462246072344
          vf_explained_var: 0.8695287704467773
          vf_loss: 0.13091726150188562
    num_agent_steps_sampled: 1629348
    num_agent_steps_trained: 1629348
    num_steps_sampled: 1629348
    num_steps_trained: 1629348
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,163,49938.5,1629348,2.60816,10.63,-1.76,97.2621


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1639344
  custom_metrics: {}
  date: 2021-11-15_04-36-15
  done: false
  episode_len_mean: 97.33333333333333
  episode_media: {}
  episode_reward_max: 8.930000000000017
  episode_reward_mean: 1.6498039215686322
  episode_reward_min: -2.5099999999999945
  episodes_this_iter: 102
  episodes_total: 16854
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6852055146143985
          entropy_coeff: 0.01
          kl: 0.014552617793940303
          policy_loss: -0.046362864419531365
          total_loss: 0.08904364732149829
          vf_explained_var: 0.839133083820343
          vf_loss: 0.12496179953559787
    num_agent_steps_sampled: 1639344
    num_agent_steps_trained: 1639344
    num_steps_sampled: 1639344
    num_steps_trained: 1639344
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,164,50235.5,1639344,1.6498,8.93,-2.51,97.3333




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1649340
  custom_metrics: {}
  date: 2021-11-15_04-41-29
  done: false
  episode_len_mean: 96.96153846153847
  episode_media: {}
  episode_reward_max: 8.970000000000008
  episode_reward_mean: 2.240769230769237
  episode_reward_min: -1.6300000000000008
  episodes_this_iter: 104
  episodes_total: 16958
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.677355087109101
          entropy_coeff: 0.01
          kl: 0.01393336193583568
          policy_loss: -0.04818287414546387
          total_loss: 0.0969455013015809
          vf_explained_var: 0.8405930995941162
          vf_loss: 0.13619224504472163
    num_agent_steps_sampled: 1649340
    num_agent_steps_trained: 1649340
    num_steps_sampled: 1649340
    num_steps_trained: 1649340
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,165,50549.2,1649340,2.24077,8.97,-1.63,96.9615




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1659336
  custom_metrics: {}
  date: 2021-11-15_04-46-43
  done: false
  episode_len_mean: 93.0
  episode_media: {}
  episode_reward_max: 12.120000000000019
  episode_reward_mean: 2.0096261682243037
  episode_reward_min: -2.0599999999999996
  episodes_this_iter: 107
  episodes_total: 17065
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.692927923263648
          entropy_coeff: 0.01
          kl: 0.013970216131683371
          policy_loss: -0.05015668228117383
          total_loss: 0.08016497890831123
          vf_explained_var: 0.8394721150398254
          vf_loss: 0.12144680491879455
    num_agent_steps_sampled: 1659336
    num_agent_steps_trained: 1659336
    num_steps_sampled: 1659336
    num_steps_trained: 1659336
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,166,50863.8,1659336,2.00963,12.12,-2.06,93


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1669332
  custom_metrics: {}
  date: 2021-11-15_04-51-42
  done: false
  episode_len_mean: 96.32692307692308
  episode_media: {}
  episode_reward_max: 8.700000000000017
  episode_reward_mean: 1.6870192307692358
  episode_reward_min: -1.7700000000000007
  episodes_this_iter: 104
  episodes_total: 17169
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.7002117205888796
          entropy_coeff: 0.01
          kl: 0.013043768726890373
          policy_loss: -0.0516253618706559
          total_loss: 0.06443025214780664
          vf_explained_var: 0.8406623005867004
          vf_loss: 0.10962797958348106
    num_agent_steps_sampled: 1669332
    num_agent_steps_trained: 1669332
    num_steps_sampled: 1669332
    num_steps_trained: 1669332
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,167,51162.5,1669332,1.68702,8.7,-1.77,96.3269




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1679328
  custom_metrics: {}
  date: 2021-11-15_04-57-10
  done: false
  episode_len_mean: 95.54285714285714
  episode_media: {}
  episode_reward_max: 8.660000000000018
  episode_reward_mean: 2.233619047619053
  episode_reward_min: -1.950000000000001
  episodes_this_iter: 105
  episodes_total: 17274
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.69010919882701
          entropy_coeff: 0.01
          kl: 0.01475636461622783
          policy_loss: -0.04733016489423875
          total_loss: 0.0941407330457567
          vf_explained_var: 0.8566058874130249
          vf_loss: 0.13055304229959974
    num_agent_steps_sampled: 1679328
    num_agent_steps_trained: 1679328
    num_steps_sampled: 1679328
    num_steps_trained: 1679328
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,168,51490.6,1679328,2.23362,8.66,-1.95,95.5429




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1689324
  custom_metrics: {}
  date: 2021-11-15_05-02-26
  done: false
  episode_len_mean: 95.66346153846153
  episode_media: {}
  episode_reward_max: 10.740000000000014
  episode_reward_mean: 1.96288461538462
  episode_reward_min: -1.6700000000000006
  episodes_this_iter: 104
  episodes_total: 17378
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6884362156574544
          entropy_coeff: 0.01
          kl: 0.01529029107160133
          policy_loss: -0.042989018317471206
          total_loss: 0.13781777173400117
          vf_explained_var: 0.817865788936615
          vf_loss: 0.16850380930317263
    num_agent_steps_sampled: 1689324
    num_agent_steps_trained: 1689324
    num_steps_sampled: 1689324
    num_steps_trained: 1689324
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,169,51806.2,1689324,1.96288,10.74,-1.67,95.6635




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1699320
  custom_metrics: {}
  date: 2021-11-15_05-07-40
  done: false
  episode_len_mean: 96.51428571428572
  episode_media: {}
  episode_reward_max: 8.650000000000011
  episode_reward_mean: 1.659428571428576
  episode_reward_min: -1.9400000000000008
  episodes_this_iter: 105
  episodes_total: 17483
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.7087362022481414
          entropy_coeff: 0.01
          kl: 0.014448347540158852
          policy_loss: -0.0458786404480696
          total_loss: 0.10175247565449144
          vf_explained_var: 0.8332725167274475
          vf_loss: 0.13768894455602598
    num_agent_steps_sampled: 1699320
    num_agent_steps_trained: 1699320
    num_steps_sampled: 1699320
    num_steps_trained: 1699320
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,170,52120.6,1699320,1.65943,8.65,-1.94,96.5143


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1709316
  custom_metrics: {}
  date: 2021-11-15_05-12-39
  done: false
  episode_len_mean: 96.24271844660194
  episode_media: {}
  episode_reward_max: 8.850000000000017
  episode_reward_mean: 2.2468932038835012
  episode_reward_min: -2.04
  episodes_this_iter: 103
  episodes_total: 17586
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6883023442366185
          entropy_coeff: 0.01
          kl: 0.014398109378077879
          policy_loss: -0.048450497007713866
          total_loss: 0.09671726363400618
          vf_explained_var: 0.8388465046882629
          vf_loss: 0.13515000524612256
    num_agent_steps_sampled: 1709316
    num_agent_steps_trained: 1709316
    num_steps_sampled: 1709316
    num_steps_trained: 1709316
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,171,52419.6,1709316,2.24689,8.85,-2.04,96.2427




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1719312
  custom_metrics: {}
  date: 2021-11-15_05-17-53
  done: false
  episode_len_mean: 94.38095238095238
  episode_media: {}
  episode_reward_max: 9.030000000000015
  episode_reward_mean: 2.0555238095238146
  episode_reward_min: -1.4100000000000008
  episodes_this_iter: 105
  episodes_total: 17691
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.695082200694288
          entropy_coeff: 0.01
          kl: 0.015528293074602406
          policy_loss: -0.04446330753823694
          total_loss: 0.12314544930958594
          vf_explained_var: 0.8283409476280212
          vf_loss: 0.15476226411823535
    num_agent_steps_sampled: 1719312
    num_agent_steps_trained: 1719312
    num_steps_sampled: 1719312
    num_steps_trained: 1719312
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,172,52732.7,1719312,2.05552,9.03,-1.41,94.381




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1729308
  custom_metrics: {}
  date: 2021-11-15_05-23-06
  done: false
  episode_len_mean: 96.25961538461539
  episode_media: {}
  episode_reward_max: 8.650000000000015
  episode_reward_mean: 1.6908653846153887
  episode_reward_min: -2.1599999999999997
  episodes_this_iter: 104
  episodes_total: 17795
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.698863282876137
          entropy_coeff: 0.01
          kl: 0.013916835159242087
          policy_loss: -0.04633315913060791
          total_loss: 0.1036670283239303
          vf_explained_var: 0.8012557625770569
          vf_loss: 0.14132149475626649
    num_agent_steps_sampled: 1729308
    num_agent_steps_trained: 1729308
    num_steps_sampled: 1729308
    num_steps_trained: 1729308
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,173,53046.4,1729308,1.69087,8.65,-2.16,96.2596


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1739304
  custom_metrics: {}
  date: 2021-11-15_05-28-08
  done: false
  episode_len_mean: 95.47619047619048
  episode_media: {}
  episode_reward_max: 10.380000000000017
  episode_reward_mean: 1.649809523809528
  episode_reward_min: -2.0300000000000002
  episodes_this_iter: 105
  episodes_total: 17900
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6998901651455807
          entropy_coeff: 0.01
          kl: 0.013408248960417616
          policy_loss: -0.048684746974235416
          total_loss: 0.08589662567067605
          vf_explained_var: 0.8152785301208496
          vf_loss: 0.1272163987860211
    num_agent_steps_sampled: 1739304
    num_agent_steps_trained: 1739304
    num_steps_sampled: 1739304
    num_steps_trained: 1739304
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,174,53348,1739304,1.64981,10.38,-2.03,95.4762




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1749300
  custom_metrics: {}
  date: 2021-11-15_05-33-24
  done: false
  episode_len_mean: 94.34285714285714
  episode_media: {}
  episode_reward_max: 8.920000000000016
  episode_reward_mean: 1.837523809523814
  episode_reward_min: -1.570000000000001
  episodes_this_iter: 105
  episodes_total: 18005
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6987675847151342
          entropy_coeff: 0.01
          kl: 0.015444938364259815
          policy_loss: -0.04428247567664227
          total_loss: 0.1233947399072349
          vf_explained_var: 0.809515118598938
          vf_loss: 0.15508120480932805
    num_agent_steps_sampled: 1749300
    num_agent_steps_trained: 1749300
    num_steps_sampled: 1749300
    num_steps_trained: 1749300
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,175,53663.7,1749300,1.83752,8.92,-1.57,94.3429




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1759296
  custom_metrics: {}
  date: 2021-11-15_05-38-38
  done: false
  episode_len_mean: 94.60747663551402
  episode_media: {}
  episode_reward_max: 11.330000000000013
  episode_reward_mean: 2.169532710280379
  episode_reward_min: -1.7400000000000007
  episodes_this_iter: 107
  episodes_total: 18112
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.7002600885863997
          entropy_coeff: 0.01
          kl: 0.01491095682244499
          policy_loss: -0.040073827732927524
          total_loss: 0.13428883974392636
          vf_explained_var: 0.8052354454994202
          vf_loss: 0.16315011813337157
    num_agent_steps_sampled: 1759296
    num_agent_steps_trained: 1759296
    num_steps_sampled: 1759296
    num_steps_trained: 1759296
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,176,53978.2,1759296,2.16953,11.33,-1.74,94.6075


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1769292
  custom_metrics: {}
  date: 2021-11-15_05-43-36
  done: false
  episode_len_mean: 96.64077669902913
  episode_media: {}
  episode_reward_max: 6.77000000000001
  episode_reward_mean: 1.6683495145631109
  episode_reward_min: -1.790000000000001
  episodes_this_iter: 103
  episodes_total: 18215
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.7033377252073367
          entropy_coeff: 0.01
          kl: 0.013221105870651633
          policy_loss: -0.040351420835965976
          total_loss: 0.1155135274769213
          vf_explained_var: 0.7823896408081055
          vf_loss: 0.1490140785693995
    num_agent_steps_sampled: 1769292
    num_agent_steps_trained: 1769292
    num_steps_sampled: 1769292
    num_steps_trained: 1769292
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,177,54276.5,1769292,1.66835,6.77,-1.79,96.6408




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1779288
  custom_metrics: {}
  date: 2021-11-15_05-48-53
  done: false
  episode_len_mean: 93.59813084112149
  episode_media: {}
  episode_reward_max: 8.570000000000014
  episode_reward_mean: 1.75009345794393
  episode_reward_min: -2.319999999999997
  episodes_this_iter: 107
  episodes_total: 18322
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.706608591426132
          entropy_coeff: 0.01
          kl: 0.013595742087265588
          policy_loss: -0.03970168535995623
          total_loss: 0.12692261211281547
          vf_explained_var: 0.807422935962677
          vf_loss: 0.15884598447925324
    num_agent_steps_sampled: 1779288
    num_agent_steps_trained: 1779288
    num_steps_sampled: 1779288
    num_steps_trained: 1779288
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,178,54592.5,1779288,1.75009,8.57,-2.32,93.5981




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1789284
  custom_metrics: {}
  date: 2021-11-15_05-54-03
  done: false
  episode_len_mean: 94.99056603773585
  episode_media: {}
  episode_reward_max: 8.300000000000013
  episode_reward_mean: 1.5838679245283052
  episode_reward_min: -1.9500000000000008
  episodes_this_iter: 106
  episodes_total: 18428
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.7005785909473388
          entropy_coeff: 0.01
          kl: 0.013681009916752626
          policy_loss: -0.041206150600670746
          total_loss: 0.11093749763627146
          vf_explained_var: 0.8049436211585999
          vf_loss: 0.14408650341618837
    num_agent_steps_sampled: 1789284
    num_agent_steps_trained: 1789284
    num_steps_sampled: 1789284
    num_steps_trained: 1789284
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,179,54903.5,1789284,1.58387,8.3,-1.95,94.9906




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1799280
  custom_metrics: {}
  date: 2021-11-15_05-59-16
  done: false
  episode_len_mean: 94.0754716981132
  episode_media: {}
  episode_reward_max: 8.400000000000018
  episode_reward_mean: 1.9339622641509475
  episode_reward_min: -1.7400000000000007
  episodes_this_iter: 106
  episodes_total: 18534
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.695444443388882
          entropy_coeff: 0.01
          kl: 0.014041867148565963
          policy_loss: -0.0378744044349107
          total_loss: 0.11135744919371592
          vf_explained_var: 0.8235668540000916
          vf_loss: 0.14019852890991247
    num_agent_steps_sampled: 1799280
    num_agent_steps_trained: 1799280
    num_steps_sampled: 1799280
    num_steps_trained: 1799280
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,180,55215.6,1799280,1.93396,8.4,-1.74,94.0755


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1809276
  custom_metrics: {}
  date: 2021-11-15_06-04-18
  done: false
  episode_len_mean: 94.0952380952381
  episode_media: {}
  episode_reward_max: 6.950000000000015
  episode_reward_mean: 1.7828571428571476
  episode_reward_min: -2.2600000000000007
  episodes_this_iter: 105
  episodes_total: 18639
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6931817199429897
          entropy_coeff: 0.01
          kl: 0.01274234332706189
          policy_loss: -0.04178977719683232
          total_loss: 0.09510139453774079
          vf_explained_var: 0.8547006249427795
          vf_loss: 0.13116575771600453
    num_agent_steps_sampled: 1809276
    num_agent_steps_trained: 1809276
    num_steps_sampled: 1809276
    num_steps_trained: 1809276
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,181,55517.8,1809276,1.78286,6.95,-2.26,94.0952




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1819272
  custom_metrics: {}
  date: 2021-11-15_06-09-48
  done: false
  episode_len_mean: 93.6822429906542
  episode_media: {}
  episode_reward_max: 10.310000000000013
  episode_reward_mean: 2.621214953271033
  episode_reward_min: -1.7600000000000007
  episodes_this_iter: 107
  episodes_total: 18746
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6713385768425773
          entropy_coeff: 0.01
          kl: 0.014727337304752473
          policy_loss: -0.03590088413041244
          total_loss: 0.15604573961100582
          vf_explained_var: 0.8130940794944763
          vf_loss: 0.18091545610362267
    num_agent_steps_sampled: 1819272
    num_agent_steps_trained: 1819272
    num_steps_sampled: 1819272
    num_steps_trained: 1819272
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,182,55847.6,1819272,2.62121,10.31,-1.76,93.6822




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1829268
  custom_metrics: {}
  date: 2021-11-15_06-15-04
  done: false
  episode_len_mean: 94.4245283018868
  episode_media: {}
  episode_reward_max: 10.640000000000015
  episode_reward_mean: 1.7454716981132121
  episode_reward_min: -1.6900000000000008
  episodes_this_iter: 106
  episodes_total: 18852
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.692539406230307
          entropy_coeff: 0.01
          kl: 0.012356959826877423
          policy_loss: -0.04352002210485247
          total_loss: 0.11615642853017547
          vf_explained_var: 0.7982430458068848
          vf_loss: 0.15493230847124423
    num_agent_steps_sampled: 1829268
    num_agent_steps_trained: 1829268
    num_steps_sampled: 1829268
    num_steps_trained: 1829268
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,183,56163.6,1829268,1.74547,10.64,-1.69,94.4245


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1839264
  custom_metrics: {}
  date: 2021-11-15_06-20-06
  done: false
  episode_len_mean: 94.93333333333334
  episode_media: {}
  episode_reward_max: 12.910000000000009
  episode_reward_mean: 1.6014285714285756
  episode_reward_min: -2.0599999999999996
  episodes_this_iter: 105
  episodes_total: 18957
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.689838569286542
          entropy_coeff: 0.01
          kl: 0.015354047813346912
          policy_loss: -0.03607029224602649
          total_loss: 0.14311708914728946
          vf_explained_var: 0.7880058884620667
          vf_loss: 0.1667350224378463
    num_agent_steps_sampled: 1839264
    num_agent_steps_trained: 1839264
    num_steps_sampled: 1839264
    num_steps_trained: 1839264
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,184,56466,1839264,1.60143,12.91,-2.06,94.9333




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1849260
  custom_metrics: {}
  date: 2021-11-15_06-25-22
  done: false
  episode_len_mean: 92.52777777777777
  episode_media: {}
  episode_reward_max: 8.300000000000017
  episode_reward_mean: 1.804537037037041
  episode_reward_min: -1.6600000000000006
  episodes_this_iter: 108
  episodes_total: 19065
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6941362138487337
          entropy_coeff: 0.01
          kl: 0.012016901239338563
          policy_loss: -0.04065592518299181
          total_loss: 0.10194232124898933
          vf_explained_var: 0.8336895108222961
          vf_loss: 0.13874160482978018
    num_agent_steps_sampled: 1849260
    num_agent_steps_trained: 1849260
    num_steps_sampled: 1849260
    num_steps_trained: 1849260
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,185,56782,1849260,1.80454,8.3,-1.66,92.5278




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1859256
  custom_metrics: {}
  date: 2021-11-15_06-30-51
  done: false
  episode_len_mean: 91.76146788990826
  episode_media: {}
  episode_reward_max: 8.780000000000006
  episode_reward_mean: 1.8811009174311966
  episode_reward_min: -1.9200000000000004
  episodes_this_iter: 109
  episodes_total: 19174
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.688537443601168
          entropy_coeff: 0.01
          kl: 0.013226124049630136
          policy_loss: -0.03551212432038071
          total_loss: 0.12978859172027526
          vf_explained_var: 0.7720949053764343
          vf_loss: 0.1582889812353712
    num_agent_steps_sampled: 1859256
    num_agent_steps_trained: 1859256
    num_steps_sampled: 1859256
    num_steps_trained: 1859256
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,186,57110.9,1859256,1.8811,8.78,-1.92,91.7615


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1869252
  custom_metrics: {}
  date: 2021-11-15_06-35-54
  done: false
  episode_len_mean: 93.70093457943925
  episode_media: {}
  episode_reward_max: 10.950000000000012
  episode_reward_mean: 2.208037383177575
  episode_reward_min: -1.760000000000001
  episodes_this_iter: 107
  episodes_total: 19281
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6844311699908006
          entropy_coeff: 0.01
          kl: 0.014823811167750351
          policy_loss: -0.037211848439600986
          total_loss: 0.13583153590289318
          vf_explained_var: 0.8330093026161194
          vf_loss: 0.1618958898477702
    num_agent_steps_sampled: 1869252
    num_agent_steps_trained: 1869252
    num_steps_sampled: 1869252
    num_steps_trained: 1869252
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,187,57413.3,1869252,2.20804,10.95,-1.76,93.7009


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1879248
  custom_metrics: {}
  date: 2021-11-15_06-40-55
  done: false
  episode_len_mean: 95.23809523809524
  episode_media: {}
  episode_reward_max: 10.980000000000016
  episode_reward_mean: 2.007333333333338
  episode_reward_min: -2.180000000000001
  episodes_this_iter: 105
  episodes_total: 19386
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6941974736686447
          entropy_coeff: 0.01
          kl: 0.013086967745562747
          policy_loss: -0.03164814702776444
          total_loss: 0.1515402190335509
          vf_explained_var: 0.7987884283065796
          vf_loss: 0.17658987454791417
    num_agent_steps_sampled: 1879248
    num_agent_steps_trained: 1879248
    num_steps_sampled: 1879248
    num_steps_trained: 1879248
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,188,57715.1,1879248,2.00733,10.98,-2.18,95.2381




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1889244
  custom_metrics: {}
  date: 2021-11-15_06-46-23
  done: false
  episode_len_mean: 95.27619047619048
  episode_media: {}
  episode_reward_max: 12.590000000000014
  episode_reward_mean: 1.8961904761904806
  episode_reward_min: -1.830000000000001
  episodes_this_iter: 105
  episodes_total: 19491
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6853720917660966
          entropy_coeff: 0.01
          kl: 0.011618124777670135
          policy_loss: -0.0385454811135896
          total_loss: 0.12098815345213326
          vf_explained_var: 0.8002918362617493
          vf_loss: 0.15661137333143757
    num_agent_steps_sampled: 1889244
    num_agent_steps_trained: 1889244
    num_steps_sampled: 1889244
    num_steps_trained: 1889244
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,189,58042.5,1889244,1.89619,12.59,-1.83,95.2762




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1899240
  custom_metrics: {}
  date: 2021-11-15_06-51-37
  done: false
  episode_len_mean: 92.46296296296296
  episode_media: {}
  episode_reward_max: 8.890000000000015
  episode_reward_mean: 1.7837962962963005
  episode_reward_min: -2.219999999999998
  episodes_this_iter: 108
  episodes_total: 19599
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.680754722081698
          entropy_coeff: 0.01
          kl: 0.013854994343077008
          policy_loss: -0.035047712926076266
          total_loss: 0.14078102607486975
          vf_explained_var: 0.8092446327209473
          vf_loss: 0.1671274517248902
    num_agent_steps_sampled: 1899240
    num_agent_steps_trained: 1899240
    num_steps_sampled: 1899240
    num_steps_trained: 1899240
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,190,58356.9,1899240,1.7838,8.89,-2.22,92.463


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1909236
  custom_metrics: {}
  date: 2021-11-15_06-56-40
  done: false
  episode_len_mean: 93.17757009345794
  episode_media: {}
  episode_reward_max: 10.92000000000001
  episode_reward_mean: 1.9464485981308457
  episode_reward_min: -1.8900000000000015
  episodes_this_iter: 107
  episodes_total: 19706
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6989569941137592
          entropy_coeff: 0.01
          kl: 0.012407846414480587
          policy_loss: -0.03813664168517432
          total_loss: 0.11128183614987976
          vf_explained_var: 0.8136828541755676
          vf_loss: 0.14460809468649902
    num_agent_steps_sampled: 1909236
    num_agent_steps_trained: 1909236
    num_steps_sampled: 1909236
    num_steps_trained: 1909236
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,191,58660,1909236,1.94645,10.92,-1.89,93.1776




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1919232
  custom_metrics: {}
  date: 2021-11-15_07-02-17
  done: false
  episode_len_mean: 90.93636363636364
  episode_media: {}
  episode_reward_max: 8.650000000000013
  episode_reward_mean: 1.818272727272732
  episode_reward_min: -1.9000000000000008
  episodes_this_iter: 110
  episodes_total: 19816
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.68623723250169
          entropy_coeff: 0.01
          kl: 0.01264562553413045
          policy_loss: -0.033829121088656856
          total_loss: 0.1363423188917466
          vf_explained_var: 0.8159373998641968
          vf_loss: 0.16462445688656827
    num_agent_steps_sampled: 1919232
    num_agent_steps_trained: 1919232
    num_steps_sampled: 1919232
    num_steps_trained: 1919232
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,192,58996.5,1919232,1.81827,8.65,-1.9,90.9364




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1929228
  custom_metrics: {}
  date: 2021-11-15_07-07-36
  done: false
  episode_len_mean: 91.73394495412845
  episode_media: {}
  episode_reward_max: 8.700000000000014
  episode_reward_mean: 1.6755045871559666
  episode_reward_min: -1.6900000000000008
  episodes_this_iter: 109
  episodes_total: 19925
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6872639634670357
          entropy_coeff: 0.01
          kl: 0.01121147405519584
          policy_loss: -0.03665609807253648
          total_loss: 0.11934774573693355
          vf_explained_var: 0.8065246343612671
          vf_loss: 0.1541427015951779
    num_agent_steps_sampled: 1929228
    num_agent_steps_trained: 1929228
    num_steps_sampled: 1929228
    num_steps_trained: 1929228
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,193,59315.2,1929228,1.6755,8.7,-1.69,91.7339


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1939224
  custom_metrics: {}
  date: 2021-11-15_07-12-39
  done: false
  episode_len_mean: 92.68807339449542
  episode_media: {}
  episode_reward_max: 9.110000000000015
  episode_reward_mean: 1.7290825688073435
  episode_reward_min: -2.000000000000001
  episodes_this_iter: 109
  episodes_total: 20034
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.686699948962937
          entropy_coeff: 0.01
          kl: 0.012237184712027048
          policy_loss: -0.03380642011284064
          total_loss: 0.15996845939025506
          vf_explained_var: 0.7252193093299866
          vf_loss: 0.1892793134228987
    num_agent_steps_sampled: 1939224
    num_agent_steps_trained: 1939224
    num_steps_sampled: 1939224
    num_steps_trained: 1939224
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,194,59618.4,1939224,1.72908,9.11,-2,92.6881




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1949220
  custom_metrics: {}
  date: 2021-11-15_07-17-56
  done: false
  episode_len_mean: 91.57407407407408
  episode_media: {}
  episode_reward_max: 10.280000000000015
  episode_reward_mean: 2.031944444444449
  episode_reward_min: -1.6100000000000005
  episodes_this_iter: 108
  episodes_total: 20142
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6849928704082457
          entropy_coeff: 0.01
          kl: 0.012358975364353255
          policy_loss: -0.03646064313251971
          total_loss: 0.11847534002059609
          vf_explained_var: 0.8372138142585754
          vf_loss: 0.15011120951439963
    num_agent_steps_sampled: 1949220
    num_agent_steps_trained: 1949220
    num_steps_sampled: 1949220
    num_steps_trained: 1949220
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,195,59935.6,1949220,2.03194,10.28,-1.61,91.5741




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1959216
  custom_metrics: {}
  date: 2021-11-15_07-23-27
  done: false
  episode_len_mean: 91.01818181818182
  episode_media: {}
  episode_reward_max: 10.930000000000012
  episode_reward_mean: 1.7100000000000037
  episode_reward_min: -1.790000000000001
  episodes_this_iter: 110
  episodes_total: 20252
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.68362658217422
          entropy_coeff: 0.01
          kl: 0.011310001333201694
          policy_loss: -0.032173991434944746
          total_loss: 0.15333277905781745
          vf_explained_var: 0.7232828736305237
          vf_loss: 0.18335674109462743
    num_agent_steps_sampled: 1959216
    num_agent_steps_trained: 1959216
    num_steps_sampled: 1959216
    num_steps_trained: 1959216
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,196,60266.3,1959216,1.71,10.93,-1.79,91.0182


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1969212
  custom_metrics: {}
  date: 2021-11-15_07-28-32
  done: false
  episode_len_mean: 91.07272727272728
  episode_media: {}
  episode_reward_max: 10.930000000000014
  episode_reward_mean: 1.8711818181818216
  episode_reward_min: -1.5000000000000007
  episodes_this_iter: 110
  episodes_total: 20362
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.688730204411042
          entropy_coeff: 0.01
          kl: 0.010603491918449948
          policy_loss: -0.033184400875057675
          total_loss: 0.12783170231582167
          vf_explained_var: 0.8144668936729431
          vf_loss: 0.16072781562056934
    num_agent_steps_sampled: 1969212
    num_agent_steps_trained: 1969212
    num_steps_sampled: 1969212
    num_steps_trained: 1969212
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,197,60571.7,1969212,1.87118,10.93,-1.5,91.0727


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1979208
  custom_metrics: {}
  date: 2021-11-15_07-33-37
  done: false
  episode_len_mean: 92.22222222222223
  episode_media: {}
  episode_reward_max: 11.16000000000001
  episode_reward_mean: 1.7817592592592628
  episode_reward_min: -1.5800000000000007
  episodes_this_iter: 108
  episodes_total: 20470
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6866557637850446
          entropy_coeff: 0.01
          kl: 0.012744283765759793
          policy_loss: -0.03179497511293262
          total_loss: 0.16014224726897783
          vf_explained_var: 0.7432499527931213
          vf_loss: 0.18614157455352445
    num_agent_steps_sampled: 1979208
    num_agent_steps_trained: 1979208
    num_steps_sampled: 1979208
    num_steps_trained: 1979208
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,198,60875.8,1979208,1.78176,11.16,-1.58,92.2222




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1989204
  custom_metrics: {}
  date: 2021-11-15_07-38-55
  done: false
  episode_len_mean: 91.46363636363637
  episode_media: {}
  episode_reward_max: 8.610000000000014
  episode_reward_mean: 1.8791818181818218
  episode_reward_min: -1.8200000000000007
  episodes_this_iter: 110
  episodes_total: 20580
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6949737307352897
          entropy_coeff: 0.01
          kl: 0.01065220953917905
          policy_loss: -0.037213485120620546
          total_loss: 0.12918703286375247
          vf_explained_var: 0.8053334355354309
          vf_loss: 0.16604980746379647
    num_agent_steps_sampled: 1989204
    num_agent_steps_trained: 1989204
    num_steps_sampled: 1989204
    num_steps_trained: 1989204
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,199,61193.9,1989204,1.87918,8.61,-1.82,91.4636




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 1999200
  custom_metrics: {}
  date: 2021-11-15_07-44-11
  done: false
  episode_len_mean: 92.19444444444444
  episode_media: {}
  episode_reward_max: 8.740000000000014
  episode_reward_mean: 1.462592592592596
  episode_reward_min: -1.8800000000000008
  episodes_this_iter: 108
  episodes_total: 20688
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6938846096014366
          entropy_coeff: 0.01
          kl: 0.01126054516839455
          policy_loss: -0.036225979080678435
          total_loss: 0.1278529769054447
          vf_explained_var: 0.7838162779808044
          vf_loss: 0.1621582561144685
    num_agent_steps_sampled: 1999200
    num_agent_steps_trained: 1999200
    num_steps_sampled: 1999200
    num_steps_trained: 1999200
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,200,61510.3,1999200,1.46259,8.74,-1.88,92.1944


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2009196
  custom_metrics: {}
  date: 2021-11-15_07-49-15
  done: false
  episode_len_mean: 93.26168224299066
  episode_media: {}
  episode_reward_max: 8.860000000000014
  episode_reward_mean: 2.042990654205612
  episode_reward_min: -1.9800000000000009
  episodes_this_iter: 107
  episodes_total: 20795
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.683831733821804
          entropy_coeff: 0.01
          kl: 0.012558018271804614
          policy_loss: -0.030690000764627613
          total_loss: 0.1701810194326676
          vf_explained_var: 0.7883574962615967
          vf_loss: 0.19552451065367357
    num_agent_steps_sampled: 2009196
    num_agent_steps_trained: 2009196
    num_steps_sampled: 2009196
    num_steps_trained: 2009196
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,201,61813.8,2009196,2.04299,8.86,-1.98,93.2617




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2019192
  custom_metrics: {}
  date: 2021-11-15_07-54-47
  done: false
  episode_len_mean: 91.0909090909091
  episode_media: {}
  episode_reward_max: 8.980000000000013
  episode_reward_mean: 1.8910909090909127
  episode_reward_min: -1.980000000000001
  episodes_this_iter: 110
  episodes_total: 20905
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6775314137466952
          entropy_coeff: 0.01
          kl: 0.011190557916313527
          policy_loss: -0.03020482259308999
          total_loss: 0.1600366015277771
          vf_explained_var: 0.7911524772644043
          vf_loss: 0.18833656359989293
    num_agent_steps_sampled: 2019192
    num_agent_steps_trained: 2019192
    num_steps_sampled: 2019192
    num_steps_trained: 2019192
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,202,62146.7,2019192,1.89109,8.98,-1.98,91.0909




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2029188
  custom_metrics: {}
  date: 2021-11-15_08-00-05
  done: false
  episode_len_mean: 91.69444444444444
  episode_media: {}
  episode_reward_max: 6.870000000000014
  episode_reward_mean: 1.5763888888888917
  episode_reward_min: -1.780000000000001
  episodes_this_iter: 108
  episodes_total: 21013
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6869826446231615
          entropy_coeff: 0.01
          kl: 0.011988438743278611
          policy_loss: -0.03297267098250425
          total_loss: 0.15763678165017347
          vf_explained_var: 0.7558754682540894
          vf_loss: 0.186754223678468
    num_agent_steps_sampled: 2029188
    num_agent_steps_trained: 2029188
    num_steps_sampled: 2029188
    num_steps_trained: 2029188
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,203,62464.3,2029188,1.57639,6.87,-1.78,91.6944


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2039184
  custom_metrics: {}
  date: 2021-11-15_08-05-10
  done: false
  episode_len_mean: 91.98181818181818
  episode_media: {}
  episode_reward_max: 6.750000000000012
  episode_reward_mean: 1.767000000000004
  episode_reward_min: -1.8200000000000007
  episodes_this_iter: 110
  episodes_total: 21123
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6899104084724037
          entropy_coeff: 0.01
          kl: 0.010858990622949816
          policy_loss: -0.034923039622700366
          total_loss: 0.14740730171473937
          vf_explained_var: 0.7613874673843384
          vf_loss: 0.1813990408148712
    num_agent_steps_sampled: 2039184
    num_agent_steps_trained: 2039184
    num_steps_sampled: 2039184
    num_steps_trained: 2039184
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,204,62769.4,2039184,1.767,6.75,-1.82,91.9818




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2049180
  custom_metrics: {}
  date: 2021-11-15_08-10-30
  done: false
  episode_len_mean: 89.86486486486487
  episode_media: {}
  episode_reward_max: 10.510000000000016
  episode_reward_mean: 2.018198198198201
  episode_reward_min: -1.5800000000000007
  episodes_this_iter: 111
  episodes_total: 21234
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.670743899161999
          entropy_coeff: 0.01
          kl: 0.01134497672296804
          policy_loss: -0.0321560760283381
          total_loss: 0.1505273237521959
          vf_explained_var: 0.7856696844100952
          vf_loss: 0.18031490427386174
    num_agent_steps_sampled: 2049180
    num_agent_steps_trained: 2049180
    num_steps_sampled: 2049180
    num_steps_trained: 2049180
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,205,63089.1,2049180,2.0182,10.51,-1.58,89.8649




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2059176
  custom_metrics: {}
  date: 2021-11-15_08-16-03
  done: false
  episode_len_mean: 90.09909909909909
  episode_media: {}
  episode_reward_max: 8.350000000000016
  episode_reward_mean: 1.6234234234234268
  episode_reward_min: -1.790000000000001
  episodes_this_iter: 111
  episodes_total: 21345
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.679066807286352
          entropy_coeff: 0.01
          kl: 0.010835755667733258
          policy_loss: -0.03220146562044437
          total_loss: 0.15344316678113726
          vf_explained_var: 0.7809394598007202
          vf_loss: 0.18466444427752468
    num_agent_steps_sampled: 2059176
    num_agent_steps_trained: 2059176
    num_steps_sampled: 2059176
    num_steps_trained: 2059176
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,206,63422.2,2059176,1.62342,8.35,-1.79,90.0991


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2069172
  custom_metrics: {}
  date: 2021-11-15_08-21-09
  done: false
  episode_len_mean: 90.30909090909091
  episode_media: {}
  episode_reward_max: 10.360000000000014
  episode_reward_mean: 1.5766363636363672
  episode_reward_min: -1.7700000000000007
  episodes_this_iter: 110
  episodes_total: 21455
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.665822522354941
          entropy_coeff: 0.01
          kl: 0.010469083275339025
          policy_loss: -0.031113391953448838
          total_loss: 0.1594943494695183
          vf_explained_var: 0.7438340783119202
          vf_loss: 0.19043485122119896
    num_agent_steps_sampled: 2069172
    num_agent_steps_trained: 2069172
    num_steps_sampled: 2069172
    num_steps_trained: 2069172
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,207,63727.9,2069172,1.57664,10.36,-1.77,90.3091




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2079168
  custom_metrics: {}
  date: 2021-11-15_08-26-27
  done: false
  episode_len_mean: 90.72072072072072
  episode_media: {}
  episode_reward_max: 8.680000000000016
  episode_reward_mean: 2.0318918918918967
  episode_reward_min: -2.0099999999999993
  episodes_this_iter: 111
  episodes_total: 21566
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.66404390752825
          entropy_coeff: 0.01
          kl: 0.011071580410232412
          policy_loss: -0.028759032913332438
          total_loss: 0.18419146534636552
          vf_explained_var: 0.7781946063041687
          vf_loss: 0.21121568868382493
    num_agent_steps_sampled: 2079168
    num_agent_steps_trained: 2079168
    num_steps_sampled: 2079168
    num_steps_trained: 2079168
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,208,64045.9,2079168,2.03189,8.68,-2.01,90.7207




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2089164
  custom_metrics: {}
  date: 2021-11-15_08-32-02
  done: false
  episode_len_mean: 89.53571428571429
  episode_media: {}
  episode_reward_max: 9.19000000000001
  episode_reward_mean: 1.8714285714285743
  episode_reward_min: -1.82
  episodes_this_iter: 112
  episodes_total: 21678
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.667068792000795
          entropy_coeff: 0.01
          kl: 0.011249019172804364
          policy_loss: -0.03122763892587943
          total_loss: 0.161319716884476
          vf_explained_var: 0.8083526492118835
          vf_loss: 0.1903880381343775
    num_agent_steps_sampled: 2089164
    num_agent_steps_trained: 2089164
    num_steps_sampled: 2089164
    num_steps_trained: 2089164
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,209,64381,2089164,1.87143,9.19,-1.82,89.5357


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2099160
  custom_metrics: {}
  date: 2021-11-15_08-37-07
  done: false
  episode_len_mean: 91.64220183486239
  episode_media: {}
  episode_reward_max: 8.610000000000014
  episode_reward_mean: 1.7315596330275265
  episode_reward_min: -1.6200000000000008
  episodes_this_iter: 109
  episodes_total: 21787
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.681338188892756
          entropy_coeff: 0.01
          kl: 0.009622087512669407
          policy_loss: -0.03257231067579526
          total_loss: 0.14422755106710472
          vf_explained_var: 0.7939743399620056
          vf_loss: 0.17895288610488622
    num_agent_steps_sampled: 2099160
    num_agent_steps_trained: 2099160
    num_steps_sampled: 2099160
    num_steps_trained: 2099160
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,210,64685.5,2099160,1.73156,8.61,-1.62,91.6422


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2109156
  custom_metrics: {}
  date: 2021-11-15_08-42-12
  done: false
  episode_len_mean: 92.01851851851852
  episode_media: {}
  episode_reward_max: 9.090000000000009
  episode_reward_mean: 2.017500000000004
  episode_reward_min: -1.970000000000001
  episodes_this_iter: 108
  episodes_total: 21895
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6754159572796943
          entropy_coeff: 0.01
          kl: 0.012907151090432303
          policy_loss: -0.02884280165641481
          total_loss: 0.20249086292739163
          vf_explained_var: 0.7634546756744385
          vf_loss: 0.22500820849449016
    num_agent_steps_sampled: 2109156
    num_agent_steps_trained: 2109156
    num_steps_sampled: 2109156
    num_steps_trained: 2109156
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,211,64990.5,2109156,2.0175,9.09,-1.97,92.0185




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2119152
  custom_metrics: {}
  date: 2021-11-15_08-47-32
  done: false
  episode_len_mean: 90.39090909090909
  episode_media: {}
  episode_reward_max: 8.610000000000012
  episode_reward_mean: 1.9736363636363676
  episode_reward_min: -1.910000000000001
  episodes_this_iter: 110
  episodes_total: 22005
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6541533654571596
          entropy_coeff: 0.01
          kl: 0.011703755685281352
          policy_loss: -0.03079819467284868
          total_loss: 0.16605648581519658
          vf_explained_var: 0.7810791730880737
          vf_loss: 0.19340076966760442
    num_agent_steps_sampled: 2119152
    num_agent_steps_trained: 2119152
    num_steps_sampled: 2119152
    num_steps_trained: 2119152
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,212,65310.5,2119152,1.97364,8.61,-1.91,90.3909




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2129148
  custom_metrics: {}
  date: 2021-11-15_08-52-50
  done: false
  episode_len_mean: 90.67567567567568
  episode_media: {}
  episode_reward_max: 12.520000000000012
  episode_reward_mean: 2.1023423423423466
  episode_reward_min: -1.9900000000000009
  episodes_this_iter: 111
  episodes_total: 22116
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6571500996239164
          entropy_coeff: 0.01
          kl: 0.01022178640866476
          policy_loss: -0.03440222450110138
          total_loss: 0.14834693115419492
          vf_explained_var: 0.8094053864479065
          vf_loss: 0.18312333634712247
    num_agent_steps_sampled: 2129148
    num_agent_steps_trained: 2129148
    num_steps_sampled: 2129148
    num_steps_trained: 2129148
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,213,65629.2,2129148,2.10234,12.52,-1.99,90.6757


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2139144
  custom_metrics: {}
  date: 2021-11-15_08-57-57
  done: false
  episode_len_mean: 91.1559633027523
  episode_media: {}
  episode_reward_max: 10.600000000000014
  episode_reward_mean: 1.4143119266055078
  episode_reward_min: -1.7000000000000006
  episodes_this_iter: 109
  episodes_total: 22225
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.677157161276565
          entropy_coeff: 0.01
          kl: 0.009625145661582804
          policy_loss: -0.031062986020150028
          total_loss: 0.11763859732856607
          vf_explained_var: 0.7552592158317566
          vf_loss: 0.15080495919697942
    num_agent_steps_sampled: 2139144
    num_agent_steps_trained: 2139144
    num_steps_sampled: 2139144
    num_steps_trained: 2139144
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,214,65935.4,2139144,1.41431,10.6,-1.7,91.156




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2149140
  custom_metrics: {}
  date: 2021-11-15_09-03-20
  done: false
  episode_len_mean: 90.51351351351352
  episode_media: {}
  episode_reward_max: 10.850000000000014
  episode_reward_mean: 1.9838738738738777
  episode_reward_min: -1.840000000000001
  episodes_this_iter: 111
  episodes_total: 22336
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6622873172800765
          entropy_coeff: 0.01
          kl: 0.010674104621337673
          policy_loss: -0.033326922965228045
          total_loss: 0.16640277790924551
          vf_explained_var: 0.736659049987793
          vf_loss: 0.19899601120716678
    num_agent_steps_sampled: 2149140
    num_agent_steps_trained: 2149140
    num_steps_sampled: 2149140
    num_steps_trained: 2149140
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,215,66259,2149140,1.98387,10.85,-1.84,90.5135




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2159136
  custom_metrics: {}
  date: 2021-11-15_09-08-39
  done: false
  episode_len_mean: 90.80909090909091
  episode_media: {}
  episode_reward_max: 8.890000000000011
  episode_reward_mean: 1.6804545454545492
  episode_reward_min: -1.7600000000000007
  episodes_this_iter: 110
  episodes_total: 22446
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.669182993713607
          entropy_coeff: 0.01
          kl: 0.010217388200850113
          policy_loss: -0.03358774600455012
          total_loss: 0.1573351514310791
          vf_explained_var: 0.7234303951263428
          vf_loss: 0.19142867979338854
    num_agent_steps_sampled: 2159136
    num_agent_steps_trained: 2159136
    num_steps_sampled: 2159136
    num_steps_trained: 2159136
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,216,66577.8,2159136,1.68045,8.89,-1.76,90.8091


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2169132
  custom_metrics: {}
  date: 2021-11-15_09-13-46
  done: false
  episode_len_mean: 91.54128440366972
  episode_media: {}
  episode_reward_max: 8.60000000000001
  episode_reward_mean: 1.8648623853211048
  episode_reward_min: -1.6700000000000008
  episodes_this_iter: 109
  episodes_total: 22555
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6584719264609182
          entropy_coeff: 0.01
          kl: 0.010453423759049615
          policy_loss: -0.033278805284928055
          total_loss: 0.14977601814266836
          vf_explained_var: 0.8031820058822632
          vf_loss: 0.18284856086660528
    num_agent_steps_sampled: 2169132
    num_agent_steps_trained: 2169132
    num_steps_sampled: 2169132
    num_steps_trained: 2169132
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,217,66884.3,2169132,1.86486,8.6,-1.67,91.5413




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2179128
  custom_metrics: {}
  date: 2021-11-15_09-19-06
  done: false
  episode_len_mean: 90.43243243243244
  episode_media: {}
  episode_reward_max: 12.610000000000014
  episode_reward_mean: 1.8299099099099136
  episode_reward_min: -1.9300000000000008
  episodes_this_iter: 111
  episodes_total: 22666
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6576466464588786
          entropy_coeff: 0.01
          kl: 0.010840674505228192
          policy_loss: -0.034212669142338836
          total_loss: 0.1649359533778177
          vf_explained_var: 0.7498178482055664
          vf_loss: 0.19794162650688146
    num_agent_steps_sampled: 2179128
    num_agent_steps_trained: 2179128
    num_steps_sampled: 2179128
    num_steps_trained: 2179128
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,218,67204.9,2179128,1.82991,12.61,-1.93,90.4324




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2189124
  custom_metrics: {}
  date: 2021-11-15_09-24-41
  done: false
  episode_len_mean: 89.85585585585585
  episode_media: {}
  episode_reward_max: 10.440000000000012
  episode_reward_mean: 2.0928828828828867
  episode_reward_min: -1.800000000000001
  episodes_this_iter: 111
  episodes_total: 22777
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.657847427099179
          entropy_coeff: 0.01
          kl: 0.011662191529346925
          policy_loss: -0.029920019856335706
          total_loss: 0.20628403775218843
          vf_explained_var: 0.7585468888282776
          vf_loss: 0.2328936114270463
    num_agent_steps_sampled: 2189124
    num_agent_steps_trained: 2189124
    num_steps_sampled: 2189124
    num_steps_trained: 2189124
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,219,67539.9,2189124,2.09288,10.44,-1.8,89.8559


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2199120
  custom_metrics: {}
  date: 2021-11-15_09-29-47
  done: false
  episode_len_mean: 91.24545454545455
  episode_media: {}
  episode_reward_max: 8.910000000000013
  episode_reward_mean: 1.4557272727272763
  episode_reward_min: -1.7100000000000006
  episodes_this_iter: 110
  episodes_total: 22887
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6588305961372507
          entropy_coeff: 0.01
          kl: 0.010326471682645921
          policy_loss: -0.03306561791473347
          total_loss: 0.15218492426837865
          vf_explained_var: 0.7473633289337158
          vf_loss: 0.18537323117001445
    num_agent_steps_sampled: 2199120
    num_agent_steps_trained: 2199120
    num_steps_sampled: 2199120
    num_steps_trained: 2199120
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,220,67845.6,2199120,1.45573,8.91,-1.71,91.2455


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2209116
  custom_metrics: {}
  date: 2021-11-15_09-34-55
  done: false
  episode_len_mean: 91.04587155963303
  episode_media: {}
  episode_reward_max: 8.540000000000013
  episode_reward_mean: 1.8864220183486275
  episode_reward_min: -1.780000000000001
  episodes_this_iter: 109
  episodes_total: 22996
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.658036321758205
          entropy_coeff: 0.01
          kl: 0.011318483982633942
          policy_loss: -0.0322260540852355
          total_loss: 0.14876301153204763
          vf_explained_var: 0.8129165768623352
          vf_loss: 0.17856139328219314
    num_agent_steps_sampled: 2209116
    num_agent_steps_trained: 2209116
    num_steps_sampled: 2209116
    num_steps_trained: 2209116
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,221,68154,2209116,1.88642,8.54,-1.78,91.0459




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2219112
  custom_metrics: {}
  date: 2021-11-15_09-40-31
  done: false
  episode_len_mean: 88.12280701754386
  episode_media: {}
  episode_reward_max: 9.080000000000009
  episode_reward_mean: 1.4207017543859675
  episode_reward_min: -1.850000000000001
  episodes_this_iter: 114
  episodes_total: 23110
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6526459677606566
          entropy_coeff: 0.01
          kl: 0.010779408968897548
          policy_loss: -0.03278481591906812
          total_loss: 0.14167192345803492
          vf_explained_var: 0.7944818735122681
          vf_loss: 0.17335675346752644
    num_agent_steps_sampled: 2219112
    num_agent_steps_trained: 2219112
    num_steps_sampled: 2219112
    num_steps_trained: 2219112
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,222,68489.1,2219112,1.4207,9.08,-1.85,88.1228


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2229108
  custom_metrics: {}
  date: 2021-11-15_09-45-38
  done: false
  episode_len_mean: 90.35135135135135
  episode_media: {}
  episode_reward_max: 10.910000000000018
  episode_reward_mean: 1.8961261261261297
  episode_reward_min: -1.699999999999997
  episodes_this_iter: 111
  episodes_total: 23221
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6453425745678762
          entropy_coeff: 0.01
          kl: 0.011695150764755958
          policy_loss: -0.031289364746174754
          total_loss: 0.16005969563395614
          vf_explained_var: 0.7956774234771729
          vf_loss: 0.18782909417158772
    num_agent_steps_sampled: 2229108
    num_agent_steps_trained: 2229108
    num_steps_sampled: 2229108
    num_steps_trained: 2229108
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,223,68796.3,2229108,1.89613,10.91,-1.7,90.3514


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2239104
  custom_metrics: {}
  date: 2021-11-15_09-50-45
  done: false
  episode_len_mean: 90.57272727272728
  episode_media: {}
  episode_reward_max: 7.090000000000009
  episode_reward_mean: 1.653000000000004
  episode_reward_min: -1.9500000000000008
  episodes_this_iter: 110
  episodes_total: 23331
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.653941658826975
          entropy_coeff: 0.01
          kl: 0.011757761919655328
          policy_loss: -0.029566756326259457
          total_loss: 0.18872599160601186
          vf_explained_var: 0.7812633514404297
          vf_loss: 0.21469830710890617
    num_agent_steps_sampled: 2239104
    num_agent_steps_trained: 2239104
    num_steps_sampled: 2239104
    num_steps_trained: 2239104
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,224,69103.8,2239104,1.653,7.09,-1.95,90.5727




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2249100
  custom_metrics: {}
  date: 2021-11-15_09-56-24
  done: false
  episode_len_mean: 88.91964285714286
  episode_media: {}
  episode_reward_max: 12.730000000000015
  episode_reward_mean: 1.7843750000000038
  episode_reward_min: -1.810000000000001
  episodes_this_iter: 112
  episodes_total: 23443
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6540121246606874
          entropy_coeff: 0.01
          kl: 0.011392477553918397
          policy_loss: -0.027510592587387715
          total_loss: 0.18226634993090526
          vf_explained_var: 0.7697514891624451
          vf_loss: 0.2071193908396949
    num_agent_steps_sampled: 2249100
    num_agent_steps_trained: 2249100
    num_steps_sampled: 2249100
    num_steps_trained: 2249100
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,225,69442.4,2249100,1.78438,12.73,-1.81,88.9196


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2259096
  custom_metrics: {}
  date: 2021-11-15_10-01-37
  done: false
  episode_len_mean: 90.38738738738739
  episode_media: {}
  episode_reward_max: 12.860000000000012
  episode_reward_mean: 2.0508108108108147
  episode_reward_min: -1.8500000000000008
  episodes_this_iter: 111
  episodes_total: 23554
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6524330833019354
          entropy_coeff: 0.01
          kl: 0.012439693256184261
          policy_loss: -0.02818745534118806
          total_loss: 0.18902719625486777
          vf_explained_var: 0.7765949368476868
          vf_loss: 0.21185740999495373
    num_agent_steps_sampled: 2259096
    num_agent_steps_trained: 2259096
    num_steps_sampled: 2259096
    num_steps_trained: 2259096
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,226,69754.9,2259096,2.05081,12.86,-1.85,90.3874


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2269092
  custom_metrics: {}
  date: 2021-11-15_10-06-48
  done: false
  episode_len_mean: 89.94594594594595
  episode_media: {}
  episode_reward_max: 10.400000000000013
  episode_reward_mean: 1.679639639639643
  episode_reward_min: -1.9900000000000009
  episodes_this_iter: 111
  episodes_total: 23665
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6518984347327144
          entropy_coeff: 0.01
          kl: 0.012069926304436535
          policy_loss: -0.031060986337450962
          total_loss: 0.1753795258827213
          vf_explained_var: 0.765267550945282
          vf_loss: 0.2020255962291207
    num_agent_steps_sampled: 2269092
    num_agent_steps_trained: 2269092
    num_steps_sampled: 2269092
    num_steps_trained: 2269092
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,227,70065.9,2269092,1.67964,10.4,-1.99,89.9459




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2279088
  custom_metrics: {}
  date: 2021-11-15_10-12-18
  done: false
  episode_len_mean: 89.24778761061947
  episode_media: {}
  episode_reward_max: 10.730000000000011
  episode_reward_mean: 1.8716814159292072
  episode_reward_min: -1.6000000000000008
  episodes_this_iter: 113
  episodes_total: 23778
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.64360246077562
          entropy_coeff: 0.01
          kl: 0.012718352069583041
          policy_loss: -0.028110237033544188
          total_loss: 0.18380273071666942
          vf_explained_var: 0.7981483340263367
          vf_loss: 0.20575324781159432
    num_agent_steps_sampled: 2279088
    num_agent_steps_trained: 2279088
    num_steps_sampled: 2279088
    num_steps_trained: 2279088
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,228,70396.1,2279088,1.87168,10.73,-1.6,89.2478




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2289084
  custom_metrics: {}
  date: 2021-11-15_10-17-46
  done: false
  episode_len_mean: 87.68141592920354
  episode_media: {}
  episode_reward_max: 8.670000000000014
  episode_reward_mean: 1.7923008849557558
  episode_reward_min: -1.4700000000000006
  episodes_this_iter: 113
  episodes_total: 23891
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6460152393732317
          entropy_coeff: 0.01
          kl: 0.011849079046206452
          policy_loss: -0.02902691475310737
          total_loss: 0.14801791804319278
          vf_explained_var: 0.8044949769973755
          vf_loss: 0.17313709217004286
    num_agent_steps_sampled: 2289084
    num_agent_steps_trained: 2289084
    num_steps_sampled: 2289084
    num_steps_trained: 2289084
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,229,70724.8,2289084,1.7923,8.67,-1.47,87.6814


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2299080
  custom_metrics: {}
  date: 2021-11-15_10-23-02
  done: false
  episode_len_mean: 90.35135135135135
  episode_media: {}
  episode_reward_max: 10.720000000000013
  episode_reward_mean: 1.783603603603607
  episode_reward_min: -2.3199999999999976
  episodes_this_iter: 111
  episodes_total: 24002
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6593215064105826
          entropy_coeff: 0.01
          kl: 0.011163198770953524
          policy_loss: -0.031082759091121136
          total_loss: 0.1333415869396562
          vf_explained_var: 0.8143333792686462
          vf_loss: 0.16240750373396864
    num_agent_steps_sampled: 2299080
    num_agent_steps_trained: 2299080
    num_steps_sampled: 2299080
    num_steps_trained: 2299080
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,230,71040.7,2299080,1.7836,10.72,-2.32,90.3514




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2309076
  custom_metrics: {}
  date: 2021-11-15_10-28-32
  done: false
  episode_len_mean: 88.375
  episode_media: {}
  episode_reward_max: 8.96000000000001
  episode_reward_mean: 1.9856250000000035
  episode_reward_min: -1.6800000000000008
  episodes_this_iter: 112
  episodes_total: 24114
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.654501135328896
          entropy_coeff: 0.01
          kl: 0.011341997041112856
          policy_loss: -0.030154584538048278
          total_loss: 0.1340339279612208
          vf_explained_var: 0.8076258301734924
          vf_loss: 0.16166522659194202
    num_agent_steps_sampled: 2309076
    num_agent_steps_trained: 2309076
    num_steps_sampled: 2309076
    num_steps_trained: 2309076
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,231,71370,2309076,1.98563,8.96,-1.68,88.375




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2319072
  custom_metrics: {}
  date: 2021-11-15_10-33-58
  done: false
  episode_len_mean: 88.15789473684211
  episode_media: {}
  episode_reward_max: 11.120000000000012
  episode_reward_mean: 1.8506140350877225
  episode_reward_min: -2.0300000000000002
  episodes_this_iter: 114
  episodes_total: 24228
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6450748051333632
          entropy_coeff: 0.01
          kl: 0.012056982266658654
          policy_loss: -0.028523613469531903
          total_loss: 0.17196409766259801
          vf_explained_var: 0.7530379891395569
          vf_loss: 0.19603773330887542
    num_agent_steps_sampled: 2319072
    num_agent_steps_trained: 2319072
    num_steps_sampled: 2319072
    num_steps_trained: 2319072


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,232,71696,2319072,1.85061,11.12,-2.03,88.1579


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2329068
  custom_metrics: {}
  date: 2021-11-15_10-39-09
  done: false
  episode_len_mean: 89.12389380530973
  episode_media: {}
  episode_reward_max: 11.03000000000001
  episode_reward_mean: 1.6509734513274366
  episode_reward_min: -1.9300000000000013
  episodes_this_iter: 113
  episodes_total: 24341
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6562488531455015
          entropy_coeff: 0.01
          kl: 0.011927809381293784
          policy_loss: -0.0313569827677889
          total_loss: 0.13911167976613611
          vf_explained_var: 0.8020663857460022
          vf_loss: 0.16646148129406138
    num_agent_steps_sampled: 2329068
    num_agent_steps_trained: 2329068
    num_steps_sampled: 2329068
    num_steps_trained: 2329068
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,233,72007,2329068,1.65097,11.03,-1.93,89.1239




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2339064
  custom_metrics: {}
  date: 2021-11-15_10-44-35
  done: false
  episode_len_mean: 88.05309734513274
  episode_media: {}
  episode_reward_max: 14.600000000000012
  episode_reward_mean: 1.905840707964605
  episode_reward_min: -1.7300000000000006
  episodes_this_iter: 113
  episodes_total: 24454
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.649885322089888
          entropy_coeff: 0.01
          kl: 0.012465925538268606
          policy_loss: -0.026896913817279742
          total_loss: 0.17411191137308557
          vf_explained_var: 0.7849892377853394
          vf_loss: 0.19555887477392825
    num_agent_steps_sampled: 2339064
    num_agent_steps_trained: 2339064
    num_steps_sampled: 2339064
    num_steps_trained: 2339064
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,234,72333.6,2339064,1.90584,14.6,-1.73,88.0531




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2349060
  custom_metrics: {}
  date: 2021-11-15_10-50-03
  done: false
  episode_len_mean: 87.38260869565218
  episode_media: {}
  episode_reward_max: 10.380000000000013
  episode_reward_mean: 1.5144347826086988
  episode_reward_min: -1.7900000000000007
  episodes_this_iter: 115
  episodes_total: 24569
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.641405443350474
          entropy_coeff: 0.01
          kl: 0.01012143927738127
          policy_loss: -0.0306899503343062
          total_loss: 0.13446826749743943
          vf_explained_var: 0.7930272817611694
          vf_loss: 0.1656321310830247
    num_agent_steps_sampled: 2349060
    num_agent_steps_trained: 2349060
    num_steps_sampled: 2349060
    num_steps_trained: 2349060
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,235,72661,2349060,1.51443,10.38,-1.79,87.3826


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2359056
  custom_metrics: {}
  date: 2021-11-15_10-55-13
  done: false
  episode_len_mean: 88.20535714285714
  episode_media: {}
  episode_reward_max: 9.00000000000001
  episode_reward_mean: 1.6202678571428604
  episode_reward_min: -1.910000000000001
  episodes_this_iter: 112
  episodes_total: 24681
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6527406451029654
          entropy_coeff: 0.01
          kl: 0.011321802076842406
          policy_loss: -0.029194947487762215
          total_loss: 0.1582622453255945
          vf_explained_var: 0.7944985032081604
          vf_loss: 0.18496805912711553
    num_agent_steps_sampled: 2359056
    num_agent_steps_trained: 2359056
    num_steps_sampled: 2359056
    num_steps_trained: 2359056
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,236,72971.3,2359056,1.62027,9,-1.91,88.2054




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2369052
  custom_metrics: {}
  date: 2021-11-15_11-00-40
  done: false
  episode_len_mean: 87.33043478260869
  episode_media: {}
  episode_reward_max: 10.83000000000001
  episode_reward_mean: 1.955478260869569
  episode_reward_min: -1.9500000000000002
  episodes_this_iter: 115
  episodes_total: 24796
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6506299001538856
          entropy_coeff: 0.01
          kl: 0.011723686809703456
          policy_loss: -0.030580628593253275
          total_loss: 0.1609732437098765
          vf_explained_var: 0.7761337757110596
          vf_loss: 0.1880136440631448
    num_agent_steps_sampled: 2369052
    num_agent_steps_trained: 2369052
    num_steps_sampled: 2369052
    num_steps_trained: 2369052
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,237,73298,2369052,1.95548,10.83,-1.95,87.3304




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2379048
  custom_metrics: {}
  date: 2021-11-15_11-06-19
  done: false
  episode_len_mean: 87.09649122807018
  episode_media: {}
  episode_reward_max: 8.800000000000011
  episode_reward_mean: 1.5634210526315817
  episode_reward_min: -2.000000000000001
  episodes_this_iter: 114
  episodes_total: 24910
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6459532421878262
          entropy_coeff: 0.01
          kl: 0.010747087893226996
          policy_loss: -0.03216171945469119
          total_loss: 0.1273169338456395
          vf_explained_var: 0.7957670092582703
          vf_loss: 0.15839457561214193
    num_agent_steps_sampled: 2379048
    num_agent_steps_trained: 2379048
    num_steps_sampled: 2379048
    num_steps_trained: 2379048
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,238,73636.8,2379048,1.56342,8.8,-2,87.0965


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2389044
  custom_metrics: {}
  date: 2021-11-15_11-11-28
  done: false
  episode_len_mean: 88.62280701754386
  episode_media: {}
  episode_reward_max: 9.010000000000012
  episode_reward_mean: 1.8242982456140389
  episode_reward_min: -2.04
  episodes_this_iter: 114
  episodes_total: 25024
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.631339374057248
          entropy_coeff: 0.01
          kl: 0.011158141139029843
          policy_loss: -0.030898395942874315
          total_loss: 0.15124189208030828
          vf_explained_var: 0.838036298751831
          vf_loss: 0.17985658647659689
    num_agent_steps_sampled: 2389044
    num_agent_steps_trained: 2389044
    num_steps_sampled: 2389044
    num_steps_trained: 2389044
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,239,73946.4,2389044,1.8243,9.01,-2.04,88.6228


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2399040
  custom_metrics: {}
  date: 2021-11-15_11-16-39
  done: false
  episode_len_mean: 88.17699115044248
  episode_media: {}
  episode_reward_max: 8.44000000000001
  episode_reward_mean: 1.8957522123893837
  episode_reward_min: -1.990000000000001
  episodes_this_iter: 113
  episodes_total: 25137
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.62363394621091
          entropy_coeff: 0.01
          kl: 0.011386584432271528
          policy_loss: -0.026031264072904983
          total_loss: 0.16586199912648553
          vf_explained_var: 0.8225060701370239
          vf_loss: 0.18894703201312796
    num_agent_steps_sampled: 2399040
    num_agent_steps_trained: 2399040
    num_steps_sampled: 2399040
    num_steps_trained: 2399040
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,240,74257,2399040,1.89575,8.44,-1.99,88.177




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2409036
  custom_metrics: {}
  date: 2021-11-15_11-22-15
  done: false
  episode_len_mean: 86.47826086956522
  episode_media: {}
  episode_reward_max: 9.08000000000001
  episode_reward_mean: 1.8528695652173945
  episode_reward_min: -1.9500000000000008
  episodes_this_iter: 115
  episodes_total: 25252
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6365440408388774
          entropy_coeff: 0.01
          kl: 0.010559758652818562
          policy_loss: -0.02859733449155863
          total_loss: 0.17420829282914344
          vf_explained_var: 0.7801810503005981
          vf_loss: 0.2021075615929997
    num_agent_steps_sampled: 2409036
    num_agent_steps_trained: 2409036
    num_steps_sampled: 2409036
    num_steps_trained: 2409036
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,241,74593.1,2409036,1.85287,9.08,-1.95,86.4783


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2419032
  custom_metrics: {}
  date: 2021-11-15_11-27-25
  done: false
  episode_len_mean: 88.16666666666667
  episode_media: {}
  episode_reward_max: 10.88000000000001
  episode_reward_mean: 1.7587719298245645
  episode_reward_min: -2.06
  episodes_this_iter: 114
  episodes_total: 25366
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.644091232515808
          entropy_coeff: 0.01
          kl: 0.01064230205139964
          policy_loss: -0.030646702191099908
          total_loss: 0.1430841091940673
          vf_explained_var: 0.7973304986953735
          vf_loss: 0.17289666968445555
    num_agent_steps_sampled: 2419032
    num_agent_steps_trained: 2419032
    num_steps_sampled: 2419032
    num_steps_trained: 2419032
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,242,74902.8,2419032,1.75877,10.88,-2.06,88.1667


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2429028
  custom_metrics: {}
  date: 2021-11-15_11-32-37
  done: false
  episode_len_mean: 87.53097345132744
  episode_media: {}
  episode_reward_max: 8.930000000000007
  episode_reward_mean: 1.733451327433631
  episode_reward_min: -1.7000000000000008
  episodes_this_iter: 113
  episodes_total: 25479
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.634741476050809
          entropy_coeff: 0.01
          kl: 0.00989874946575064
          policy_loss: -0.029474147369400558
          total_loss: 0.12774666992541497
          vf_explained_var: 0.8079569339752197
          vf_loss: 0.15819882053412243
    num_agent_steps_sampled: 2429028
    num_agent_steps_trained: 2429028
    num_steps_sampled: 2429028
    num_steps_trained: 2429028
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,243,75214.5,2429028,1.73345,8.93,-1.7,87.531




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2439024
  custom_metrics: {}
  date: 2021-11-15_11-38-16
  done: false
  episode_len_mean: 85.63247863247864
  episode_media: {}
  episode_reward_max: 9.050000000000011
  episode_reward_mean: 1.4703418803418828
  episode_reward_min: -1.820000000000001
  episodes_this_iter: 117
  episodes_total: 25596
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.639008881189884
          entropy_coeff: 0.01
          kl: 0.010725069779660881
          policy_loss: -0.02907874949165007
          total_loss: 0.16571167130222242
          vf_explained_var: 0.7844575047492981
          vf_loss: 0.19369332873954986
    num_agent_steps_sampled: 2439024
    num_agent_steps_trained: 2439024
    num_steps_sampled: 2439024
    num_steps_trained: 2439024
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,244,75553.9,2439024,1.47034,9.05,-1.82,85.6325


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2449020
  custom_metrics: {}
  date: 2021-11-15_11-43-28
  done: false
  episode_len_mean: 86.02564102564102
  episode_media: {}
  episode_reward_max: 8.85000000000001
  episode_reward_mean: 1.409316239316242
  episode_reward_min: -1.780000000000001
  episodes_this_iter: 117
  episodes_total: 25713
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6178612855764536
          entropy_coeff: 0.01
          kl: 0.012185980742521476
          policy_loss: -0.023282361360919525
          total_loss: 0.1899687079581408
          vf_explained_var: 0.7541118264198303
          vf_loss: 0.208198345064496
    num_agent_steps_sampled: 2449020
    num_agent_steps_trained: 2449020
    num_steps_sampled: 2449020
    num_steps_trained: 2449020
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,245,75865.6,2449020,1.40932,8.85,-1.78,86.0256




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2459016
  custom_metrics: {}
  date: 2021-11-15_11-48-55
  done: false
  episode_len_mean: 86.48275862068965
  episode_media: {}
  episode_reward_max: 8.470000000000013
  episode_reward_mean: 1.6068103448275888
  episode_reward_min: -1.7500000000000007
  episodes_this_iter: 116
  episodes_total: 25829
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6323710578119655
          entropy_coeff: 0.01
          kl: 0.01041801121584602
          policy_loss: -0.03110068201636657
          total_loss: 0.12908310619875407
          vf_explained_var: 0.8181443214416504
          vf_loss: 0.15980727665611885
    num_agent_steps_sampled: 2459016
    num_agent_steps_trained: 2459016
    num_steps_sampled: 2459016
    num_steps_trained: 2459016
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,246,76192.4,2459016,1.60681,8.47,-1.75,86.4828




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2469012
  custom_metrics: {}
  date: 2021-11-15_11-54-21
  done: false
  episode_len_mean: 86.84347826086956
  episode_media: {}
  episode_reward_max: 9.160000000000009
  episode_reward_mean: 1.4323478260869587
  episode_reward_min: -1.580000000000001
  episodes_this_iter: 115
  episodes_total: 25944
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6336217732511016
          entropy_coeff: 0.01
          kl: 0.009846878923423168
          policy_loss: -0.03185855535328643
          total_loss: 0.13054088617259096
          vf_explained_var: 0.7837375998497009
          vf_loss: 0.16349918578998146
    num_agent_steps_sampled: 2469012
    num_agent_steps_trained: 2469012
    num_steps_sampled: 2469012
    num_steps_trained: 2469012
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,247,76518.5,2469012,1.43235,9.16,-1.58,86.8435


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2479008
  custom_metrics: {}
  date: 2021-11-15_11-59-32
  done: false
  episode_len_mean: 87.44736842105263
  episode_media: {}
  episode_reward_max: 10.720000000000011
  episode_reward_mean: 1.4648245614035116
  episode_reward_min: -1.5900000000000007
  episodes_this_iter: 114
  episodes_total: 26058
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6334678581637196
          entropy_coeff: 0.01
          kl: 0.011413207307092485
          policy_loss: -0.026170334077448162
          total_loss: 0.1948005879413273
          vf_explained_var: 0.7343850135803223
          vf_loss: 0.21805479989872656
    num_agent_steps_sampled: 2479008
    num_agent_steps_trained: 2479008
    num_steps_sampled: 2479008
    num_steps_trained: 2479008
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,248,76829.9,2479008,1.46482,10.72,-1.59,87.4474


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2489004
  custom_metrics: {}
  date: 2021-11-15_12-04-43
  done: false
  episode_len_mean: 87.06086956521739
  episode_media: {}
  episode_reward_max: 7.02000000000001
  episode_reward_mean: 1.365130434782611
  episode_reward_min: -1.5100000000000005
  episodes_this_iter: 115
  episodes_total: 26173
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6493920827523256
          entropy_coeff: 0.01
          kl: 0.010128202071693767
          policy_loss: -0.02799018745461845
          total_loss: 0.140984304006307
          vf_explained_var: 0.7668563723564148
          vf_loss: 0.16951093843811724
    num_agent_steps_sampled: 2489004
    num_agent_steps_trained: 2489004
    num_steps_sampled: 2489004
    num_steps_trained: 2489004
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,249,77140.7,2489004,1.36513,7.02,-1.51,87.0609




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2499000
  custom_metrics: {}
  date: 2021-11-15_12-10-23
  done: false
  episode_len_mean: 85.97435897435898
  episode_media: {}
  episode_reward_max: 10.83000000000001
  episode_reward_mean: 1.6005128205128234
  episode_reward_min: -1.4800000000000009
  episodes_this_iter: 117
  episodes_total: 26290
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.637380377859132
          entropy_coeff: 0.01
          kl: 0.011069564432496737
          policy_loss: -0.023733392122607583
          total_loss: 0.15994702677449418
          vf_explained_var: 0.761166512966156
          vf_loss: 0.18168414102262284
    num_agent_steps_sampled: 2499000
    num_agent_steps_trained: 2499000
    num_steps_sampled: 2499000
    num_steps_trained: 2499000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,250,77480.6,2499000,1.60051,10.83,-1.48,85.9744


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2508996
  custom_metrics: {}
  date: 2021-11-15_12-15-37
  done: false
  episode_len_mean: 87.36283185840708
  episode_media: {}
  episode_reward_max: 10.550000000000011
  episode_reward_mean: 1.489734513274339
  episode_reward_min: -1.540000000000001
  episodes_this_iter: 113
  episodes_total: 26403
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.627521690547976
          entropy_coeff: 0.01
          kl: 0.009327148951653545
          policy_loss: -0.027782346076992714
          total_loss: 0.14914790238611975
          vf_explained_var: 0.7686508297920227
          vf_loss: 0.17930100314701214
    num_agent_steps_sampled: 2508996
    num_agent_steps_trained: 2508996
    num_steps_sampled: 2508996
    num_steps_trained: 2508996
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,251,77794.7,2508996,1.48973,10.55,-1.54,87.3628


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2518992
  custom_metrics: {}
  date: 2021-11-15_12-20-53
  done: false
  episode_len_mean: 86.93913043478261
  episode_media: {}
  episode_reward_max: 10.96000000000001
  episode_reward_mean: 1.5873043478260902
  episode_reward_min: -1.7300000000000006
  episodes_this_iter: 115
  episodes_total: 26518
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.621834128942245
          entropy_coeff: 0.01
          kl: 0.009901118448169217
          policy_loss: -0.026490317983950816
          total_loss: 0.18723346169313623
          vf_explained_var: 0.7628703713417053
          vf_loss: 0.21456663743354
    num_agent_steps_sampled: 2518992
    num_agent_steps_trained: 2518992
    num_steps_sampled: 2518992
    num_steps_trained: 2518992
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,252,78110.6,2518992,1.5873,10.96,-1.73,86.9391




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2528988
  custom_metrics: {}
  date: 2021-11-15_12-26-39
  done: false
  episode_len_mean: 84.23529411764706
  episode_media: {}
  episode_reward_max: 6.860000000000014
  episode_reward_mean: 1.3506722689075654
  episode_reward_min: -1.5700000000000007
  episodes_this_iter: 119
  episodes_total: 26637
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6347233444197564
          entropy_coeff: 0.01
          kl: 0.009053479149059454
          policy_loss: -0.026203331493366604
          total_loss: 0.14950369015041515
          vf_explained_var: 0.7477104663848877
          vf_loss: 0.1788511789057595
    num_agent_steps_sampled: 2528988
    num_agent_steps_trained: 2528988
    num_steps_sampled: 2528988
    num_steps_trained: 2528988
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,253,78456.3,2528988,1.35067,6.86,-1.57,84.2353




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2538984
  custom_metrics: {}
  date: 2021-11-15_12-32-05
  done: false
  episode_len_mean: 84.91525423728814
  episode_media: {}
  episode_reward_max: 6.940000000000006
  episode_reward_mean: 1.4878813559322055
  episode_reward_min: -1.5200000000000005
  episodes_this_iter: 118
  episodes_total: 26755
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.638087547131074
          entropy_coeff: 0.01
          kl: 0.009212580554026614
          policy_loss: -0.02643321452674289
          total_loss: 0.15905161811532373
          vf_explained_var: 0.7625764608383179
          vf_loss: 0.18825487218295725
    num_agent_steps_sampled: 2538984
    num_agent_steps_trained: 2538984
    num_steps_sampled: 2538984
    num_steps_trained: 2538984
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,254,78782.2,2538984,1.48788,6.94,-1.52,84.9153


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2548980
  custom_metrics: {}
  date: 2021-11-15_12-37-21
  done: false
  episode_len_mean: 85.73275862068965
  episode_media: {}
  episode_reward_max: 8.76000000000001
  episode_reward_mean: 1.9349137931034515
  episode_reward_min: -1.4900000000000007
  episodes_this_iter: 116
  episodes_total: 26871
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6326382245772924
          entropy_coeff: 0.01
          kl: 0.010428725591933845
          policy_loss: -0.025206835012739667
          total_loss: 0.16893128457073217
          vf_explained_var: 0.7673203945159912
          vf_loss: 0.19373681916114993
    num_agent_steps_sampled: 2548980
    num_agent_steps_trained: 2548980
    num_steps_sampled: 2548980
    num_steps_trained: 2548980
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,255,79098.2,2548980,1.93491,8.76,-1.49,85.7328




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2558976
  custom_metrics: {}
  date: 2021-11-15_12-42-49
  done: false
  episode_len_mean: 86.00862068965517
  episode_media: {}
  episode_reward_max: 8.93000000000001
  episode_reward_mean: 1.1462931034482775
  episode_reward_min: -1.790000000000001
  episodes_this_iter: 116
  episodes_total: 26987
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6483892988954856
          entropy_coeff: 0.01
          kl: 0.008915657832584507
          policy_loss: -0.02468048595688027
          total_loss: 0.17093174945539197
          vf_explained_var: 0.6841185688972473
          vf_loss: 0.1992462740960316
    num_agent_steps_sampled: 2558976
    num_agent_steps_trained: 2558976
    num_steps_sampled: 2558976
    num_steps_trained: 2558976
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,256,79426.8,2558976,1.14629,8.93,-1.79,86.0086




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2568972
  custom_metrics: {}
  date: 2021-11-15_12-48-21
  done: false
  episode_len_mean: 84.60504201680672
  episode_media: {}
  episode_reward_max: 8.680000000000009
  episode_reward_mean: 1.0680672268907583
  episode_reward_min: -1.5400000000000005
  episodes_this_iter: 119
  episodes_total: 27106
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6489769437374213
          entropy_coeff: 0.01
          kl: 0.007736202720376755
          policy_loss: -0.028781280918524433
          total_loss: 0.11685740710634133
          vf_explained_var: 0.7416872382164001
          vf_loss: 0.15230141546672735
    num_agent_steps_sampled: 2568972
    num_agent_steps_trained: 2568972
    num_steps_sampled: 2568972
    num_steps_trained: 2568972
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,257,79758.5,2568972,1.06807,8.68,-1.54,84.605




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2578968
  custom_metrics: {}
  date: 2021-11-15_12-53-57
  done: false
  episode_len_mean: 85.23076923076923
  episode_media: {}
  episode_reward_max: 5.520000000000003
  episode_reward_mean: 1.2157264957264977
  episode_reward_min: -1.6200000000000006
  episodes_this_iter: 117
  episodes_total: 27223
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6387551018315505
          entropy_coeff: 0.01
          kl: 0.008919324733480515
          policy_loss: -0.02840602370814826
          total_loss: 0.14069728072390406
          vf_explained_var: 0.7738164663314819
          vf_loss: 0.1726316014953499
    num_agent_steps_sampled: 2578968
    num_agent_steps_trained: 2578968
    num_steps_sampled: 2578968
    num_steps_trained: 2578968
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,258,80094.1,2578968,1.21573,5.52,-1.62,85.2308




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2588964
  custom_metrics: {}
  date: 2021-11-15_12-59-24
  done: false
  episode_len_mean: 85.82758620689656
  episode_media: {}
  episode_reward_max: 7.080000000000011
  episode_reward_mean: 1.1710344827586228
  episode_reward_min: -1.5400000000000007
  episodes_this_iter: 116
  episodes_total: 27339
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6458326080925443
          entropy_coeff: 0.01
          kl: 0.008836880701137651
          policy_loss: -0.02910631640981405
          total_loss: 0.14049801304419007
          vf_explained_var: 0.7599718570709229
          vf_loss: 0.17341469665392278
    num_agent_steps_sampled: 2588964
    num_agent_steps_trained: 2588964
    num_steps_sampled: 2588964
    num_steps_trained: 2588964
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,259,80420.8,2588964,1.17103,7.08,-1.54,85.8276




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2598960
  custom_metrics: {}
  date: 2021-11-15_13-04-50
  done: false
  episode_len_mean: 85.16101694915254
  episode_media: {}
  episode_reward_max: 7.070000000000009
  episode_reward_mean: 1.1777118644067817
  episode_reward_min: -1.7200000000000006
  episodes_this_iter: 118
  episodes_total: 27457
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.641521811587179
          entropy_coeff: 0.01
          kl: 0.00992242189536037
          policy_loss: -0.027290682152359403
          total_loss: 0.1567142799568291
          vf_explained_var: 0.7501871585845947
          vf_loss: 0.18499009866758975
    num_agent_steps_sampled: 2598960
    num_agent_steps_trained: 2598960
    num_steps_sampled: 2598960
    num_steps_trained: 2598960
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,260,80747.7,2598960,1.17771,7.07,-1.72,85.161




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2608956
  custom_metrics: {}
  date: 2021-11-15_13-10-15
  done: false
  episode_len_mean: 86.86086956521739
  episode_media: {}
  episode_reward_max: 10.870000000000013
  episode_reward_mean: 1.1041739130434802
  episode_reward_min: -1.6300000000000008
  episodes_this_iter: 115
  episodes_total: 27572
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6454858977570495
          entropy_coeff: 0.01
          kl: 0.008561959170400719
          policy_loss: -0.026230009480053162
          total_loss: 0.13000617576643633
          vf_explained_var: 0.744498610496521
          vf_loss: 0.1607476795740967
    num_agent_steps_sampled: 2608956
    num_agent_steps_trained: 2608956
    num_steps_sampled: 2608956
    num_steps_trained: 2608956
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,261,81072.6,2608956,1.10417,10.87,-1.63,86.8609




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2618952
  custom_metrics: {}
  date: 2021-11-15_13-15-40
  done: false
  episode_len_mean: 86.47826086956522
  episode_media: {}
  episode_reward_max: 10.790000000000015
  episode_reward_mean: 1.399739130434785
  episode_reward_min: -1.890000000000001
  episodes_this_iter: 115
  episodes_total: 27687
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.640854958183745
          entropy_coeff: 0.01
          kl: 0.010113473824574875
          policy_loss: -0.022881260376550958
          total_loss: 0.17692897932158194
          vf_explained_var: 0.7025955319404602
          vf_loss: 0.20029906312672374
    num_agent_steps_sampled: 2618952
    num_agent_steps_trained: 2618952
    num_steps_sampled: 2618952
    num_steps_trained: 2618952
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,262,81397.2,2618952,1.39974,10.79,-1.89,86.4783




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2628948
  custom_metrics: {}
  date: 2021-11-15_13-21-04
  done: false
  episode_len_mean: 86.61739130434782
  episode_media: {}
  episode_reward_max: 7.300000000000008
  episode_reward_mean: 1.6140869565217422
  episode_reward_min: -1.6500000000000008
  episodes_this_iter: 115
  episodes_total: 27802
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6346249342983605
          entropy_coeff: 0.01
          kl: 0.008766038871906913
          policy_loss: -0.02728924648040253
          total_loss: 0.16783025055232212
          vf_explained_var: 0.7837172746658325
          vf_loss: 0.19899934763208224
    num_agent_steps_sampled: 2628948
    num_agent_steps_trained: 2628948
    num_steps_sampled: 2628948
    num_steps_trained: 2628948
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,263,81721.1,2628948,1.61409,7.3,-1.65,86.6174




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2638944
  custom_metrics: {}
  date: 2021-11-15_13-26-31
  done: false
  episode_len_mean: 87.89565217391305
  episode_media: {}
  episode_reward_max: 8.580000000000014
  episode_reward_mean: 1.4779130434782635
  episode_reward_min: -1.5600000000000005
  episodes_this_iter: 115
  episodes_total: 27917
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.652084029535962
          entropy_coeff: 0.01
          kl: 0.009080640682260808
          policy_loss: -0.024087362026429585
          total_loss: 0.1636632930777935
          vf_explained_var: 0.7617142796516418
          vf_loss: 0.19099880666074018
    num_agent_steps_sampled: 2638944
    num_agent_steps_trained: 2638944
    num_steps_sampled: 2638944
    num_steps_trained: 2638944
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,264,82047.8,2638944,1.47791,8.58,-1.56,87.8957




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2648940
  custom_metrics: {}
  date: 2021-11-15_13-31-59
  done: false
  episode_len_mean: 85.64655172413794
  episode_media: {}
  episode_reward_max: 8.980000000000011
  episode_reward_mean: 1.2911206896551748
  episode_reward_min: -1.7200000000000009
  episodes_this_iter: 116
  episodes_total: 28033
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6437322450499248
          entropy_coeff: 0.01
          kl: 0.009118208508342844
          policy_loss: -0.02584015873985158
          total_loss: 0.16570463622363013
          vf_explained_var: 0.7229458689689636
          vf_loss: 0.19461314721494657
    num_agent_steps_sampled: 2648940
    num_agent_steps_trained: 2648940
    num_steps_sampled: 2648940
    num_steps_trained: 2648940
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,265,82376.3,2648940,1.29112,8.98,-1.72,85.6466




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2658936
  custom_metrics: {}
  date: 2021-11-15_13-37-24
  done: false
  episode_len_mean: 85.33050847457628
  episode_media: {}
  episode_reward_max: 8.83000000000001
  episode_reward_mean: 1.3920338983050866
  episode_reward_min: -1.570000000000001
  episodes_this_iter: 118
  episodes_total: 28151
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.645597881027776
          entropy_coeff: 0.01
          kl: 0.008966438670844096
          policy_loss: -0.022185656011231944
          total_loss: 0.1647712188339036
          vf_explained_var: 0.7309871912002563
          vf_loss: 0.1904328529428468
    num_agent_steps_sampled: 2658936
    num_agent_steps_trained: 2658936
    num_steps_sampled: 2658936
    num_steps_trained: 2658936
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,266,82701.3,2658936,1.39203,8.83,-1.57,85.3305


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2668932
  custom_metrics: {}
  date: 2021-11-15_13-42-37
  done: false
  episode_len_mean: 86.64347826086957
  episode_media: {}
  episode_reward_max: 8.670000000000012
  episode_reward_mean: 1.1806086956521762
  episode_reward_min: -1.810000000000001
  episodes_this_iter: 115
  episodes_total: 28266
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6478407051828174
          entropy_coeff: 0.01
          kl: 0.007209761324082638
          policy_loss: -0.027376808671471783
          total_loss: 0.14489067554191296
          vf_explained_var: 0.691409707069397
          vf_loss: 0.18026806111288313
    num_agent_steps_sampled: 2668932
    num_agent_steps_trained: 2668932
    num_steps_sampled: 2668932
    num_steps_trained: 2668932
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,267,83013.5,2668932,1.18061,8.67,-1.81,86.6435




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2678928
  custom_metrics: {}
  date: 2021-11-15_13-48-06
  done: false
  episode_len_mean: 84.5677966101695
  episode_media: {}
  episode_reward_max: 6.310000000000009
  episode_reward_mean: 1.1567796610169516
  episode_reward_min: -1.7300000000000009
  episodes_this_iter: 118
  episodes_total: 28384
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.643009383352394
          entropy_coeff: 0.01
          kl: 0.009220980028598112
          policy_loss: -0.02761271005958064
          total_loss: 0.16280735388485731
          vf_explained_var: 0.7190642356872559
          vf_loss: 0.19321779511096832
    num_agent_steps_sampled: 2678928
    num_agent_steps_trained: 2678928
    num_steps_sampled: 2678928
    num_steps_trained: 2678928
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,268,83343.2,2678928,1.15678,6.31,-1.73,84.5678




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2688924
  custom_metrics: {}
  date: 2021-11-15_13-53-35
  done: false
  episode_len_mean: 85.68376068376068
  episode_media: {}
  episode_reward_max: 11.06000000000001
  episode_reward_mean: 1.6535897435897464
  episode_reward_min: -1.5500000000000005
  episodes_this_iter: 117
  episodes_total: 28501
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.643094083794162
          entropy_coeff: 0.01
          kl: 0.00893370058504085
          policy_loss: -0.02355088953876024
          total_loss: 0.17153580566653265
          vf_explained_var: 0.7416312098503113
          vf_loss: 0.19862153940101784
    num_agent_steps_sampled: 2688924
    num_agent_steps_trained: 2688924
    num_steps_sampled: 2688924
    num_steps_trained: 2688924
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,269,83671.7,2688924,1.65359,11.06,-1.55,85.6838


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2698920
  custom_metrics: {}
  date: 2021-11-15_13-58-49
  done: false
  episode_len_mean: 86.52173913043478
  episode_media: {}
  episode_reward_max: 7.030000000000012
  episode_reward_mean: 1.1364347826086973
  episode_reward_min: -1.6700000000000008
  episodes_this_iter: 115
  episodes_total: 28616
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6438130296193636
          entropy_coeff: 0.01
          kl: 0.007571887831626839
          policy_loss: -0.02475499921502211
          total_loss: 0.14728871751377662
          vf_explained_var: 0.6598328351974487
          vf_loss: 0.17907592666407043
    num_agent_steps_sampled: 2698920
    num_agent_steps_trained: 2698920
    num_steps_sampled: 2698920
    num_steps_trained: 2698920
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,270,83986.1,2698920,1.13643,7.03,-1.67,86.5217




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2708916
  custom_metrics: {}
  date: 2021-11-15_14-04-16
  done: false
  episode_len_mean: 85.57758620689656
  episode_media: {}
  episode_reward_max: 9.04000000000001
  episode_reward_mean: 1.4262068965517263
  episode_reward_min: -1.5500000000000007
  episodes_this_iter: 116
  episodes_total: 28732
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6535858661700518
          entropy_coeff: 0.01
          kl: 0.008437856207631643
          policy_loss: -0.02524915392645913
          total_loss: 0.18889733914357537
          vf_explained_var: 0.6948834657669067
          vf_loss: 0.21905704891054423
    num_agent_steps_sampled: 2708916
    num_agent_steps_trained: 2708916
    num_steps_sampled: 2708916
    num_steps_trained: 2708916
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,271,84312.6,2708916,1.42621,9.04,-1.55,85.5776




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2718912
  custom_metrics: {}
  date: 2021-11-15_14-09-42
  done: false
  episode_len_mean: 85.54237288135593
  episode_media: {}
  episode_reward_max: 8.540000000000013
  episode_reward_mean: 1.3242372881355957
  episode_reward_min: -1.6500000000000008
  episodes_this_iter: 118
  episodes_total: 28850
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6581233399546043
          entropy_coeff: 0.01
          kl: 0.007289699131455469
          policy_loss: -0.026457348755664297
          total_loss: 0.12961676756843415
          vf_explained_var: 0.7848790287971497
          vf_loss: 0.1639726482761594
    num_agent_steps_sampled: 2718912
    num_agent_steps_trained: 2718912
    num_steps_sampled: 2718912
    num_steps_trained: 2718912
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,272,84639.1,2718912,1.32424,8.54,-1.65,85.5424


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2728908
  custom_metrics: {}
  date: 2021-11-15_14-14-56
  done: false
  episode_len_mean: 87.35652173913043
  episode_media: {}
  episode_reward_max: 8.750000000000012
  episode_reward_mean: 1.7440000000000027
  episode_reward_min: -2.040000000000001
  episodes_this_iter: 115
  episodes_total: 28965
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.633692502262246
          entropy_coeff: 0.01
          kl: 0.008115675319528682
          policy_loss: -0.023516894798948723
          total_loss: 0.19848748730957252
          vf_explained_var: 0.7509570717811584
          vf_loss: 0.22754171922779046
    num_agent_steps_sampled: 2728908
    num_agent_steps_trained: 2728908
    num_steps_sampled: 2728908
    num_steps_trained: 2728908
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,273,84952.6,2728908,1.744,8.75,-2.04,87.3565




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2738904
  custom_metrics: {}
  date: 2021-11-15_14-20-37
  done: false
  episode_len_mean: 84.50847457627118
  episode_media: {}
  episode_reward_max: 8.98000000000001
  episode_reward_mean: 1.4718644067796636
  episode_reward_min: -1.6900000000000008
  episodes_this_iter: 118
  episodes_total: 29083
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6404890484280057
          entropy_coeff: 0.01
          kl: 0.007717549115560125
          policy_loss: -0.02634730879535787
          total_loss: 0.16516920406745475
          vf_explained_var: 0.7416377067565918
          vf_loss: 0.1981421693514746
    num_agent_steps_sampled: 2738904
    num_agent_steps_trained: 2738904
    num_steps_sampled: 2738904
    num_steps_trained: 2738904
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,274,85293.8,2738904,1.47186,8.98,-1.69,84.5085




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2748900
  custom_metrics: {}
  date: 2021-11-15_14-26-04
  done: false
  episode_len_mean: 87.28947368421052
  episode_media: {}
  episode_reward_max: 7.100000000000008
  episode_reward_mean: 1.283070175438599
  episode_reward_min: -1.760000000000001
  episodes_this_iter: 114
  episodes_total: 29197
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.648998099310785
          entropy_coeff: 0.01
          kl: 0.007675435547069578
          policy_loss: -0.024495643448944275
          total_loss: 0.16356362443831232
          vf_explained_var: 0.6829280853271484
          vf_loss: 0.19487794691808202
    num_agent_steps_sampled: 2748900
    num_agent_steps_trained: 2748900
    num_steps_sampled: 2748900
    num_steps_trained: 2748900
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,275,85621.1,2748900,1.28307,7.1,-1.76,87.2895


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2758896
  custom_metrics: {}
  date: 2021-11-15_14-31-15
  done: false
  episode_len_mean: 87.58771929824562
  episode_media: {}
  episode_reward_max: 8.440000000000015
  episode_reward_mean: 1.554035087719301
  episode_reward_min: -1.9500000000000008
  episodes_this_iter: 114
  episodes_total: 29311
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6497742221905636
          entropy_coeff: 0.01
          kl: 0.009236735315462666
          policy_loss: -0.022060937829649983
          total_loss: 0.1859455789765741
          vf_explained_var: 0.7365950345993042
          vf_loss: 0.21083151726529767
    num_agent_steps_sampled: 2758896
    num_agent_steps_trained: 2758896
    num_steps_sampled: 2758896
    num_steps_trained: 2758896
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,276,85931.6,2758896,1.55404,8.44,-1.95,87.5877




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2768892
  custom_metrics: {}
  date: 2021-11-15_14-36-58
  done: false
  episode_len_mean: 85.33050847457628
  episode_media: {}
  episode_reward_max: 10.770000000000012
  episode_reward_mean: 1.9456779661016985
  episode_reward_min: -1.4700000000000009
  episodes_this_iter: 118
  episodes_total: 29429
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6398288619823944
          entropy_coeff: 0.01
          kl: 0.008381875246006027
          policy_loss: -0.020965325336855575
          total_loss: 0.21575999138478794
          vf_explained_var: 0.7334344983100891
          vf_loss: 0.2416417761443135
    num_agent_steps_sampled: 2768892
    num_agent_steps_trained: 2768892
    num_steps_sampled: 2768892
    num_steps_trained: 2768892
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,277,86274.3,2768892,1.94568,10.77,-1.47,85.3305




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2778888
  custom_metrics: {}
  date: 2021-11-15_14-42-25
  done: false
  episode_len_mean: 85.79310344827586
  episode_media: {}
  episode_reward_max: 7.220000000000011
  episode_reward_mean: 1.0504310344827603
  episode_reward_min: -1.6500000000000008
  episodes_this_iter: 116
  episodes_total: 29545
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6498177244113044
          entropy_coeff: 0.01
          kl: 0.006917673886159153
          policy_loss: -0.02148973451067622
          total_loss: 0.16244791381033216
          vf_explained_var: 0.7078914642333984
          vf_loss: 0.19270658424466403
    num_agent_steps_sampled: 2778888
    num_agent_steps_trained: 2778888
    num_steps_sampled: 2778888
    num_steps_trained: 2778888
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,278,86601.6,2778888,1.05043,7.22,-1.65,85.7931


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2788884
  custom_metrics: {}
  date: 2021-11-15_14-47-37
  done: false
  episode_len_mean: 87.26086956521739
  episode_media: {}
  episode_reward_max: 7.020000000000007
  episode_reward_mean: 1.435652173913046
  episode_reward_min: -1.6600000000000008
  episodes_this_iter: 115
  episodes_total: 29660
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.643436344974061
          entropy_coeff: 0.01
          kl: 0.007859228957653947
          policy_loss: -0.025901701068107644
          total_loss: 0.1560419932879412
          vf_explained_var: 0.7217201590538025
          vf_loss: 0.1882357137858804
    num_agent_steps_sampled: 2788884
    num_agent_steps_trained: 2788884
    num_steps_sampled: 2788884
    num_steps_trained: 2788884
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,279,86913.5,2788884,1.43565,7.02,-1.66,87.2609




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2798880
  custom_metrics: {}
  date: 2021-11-15_14-53-17
  done: false
  episode_len_mean: 86.6842105263158
  episode_media: {}
  episode_reward_max: 6.87000000000001
  episode_reward_mean: 1.451140350877196
  episode_reward_min: -1.6200000000000006
  episodes_this_iter: 114
  episodes_total: 29774
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.645667767422831
          entropy_coeff: 0.01
          kl: 0.0071636246250246
          policy_loss: -0.024658493431778544
          total_loss: 0.1756259506721543
          vf_explained_var: 0.7298607230186462
          vf_loss: 0.20838153661332195
    num_agent_steps_sampled: 2798880
    num_agent_steps_trained: 2798880
    num_steps_sampled: 2798880
    num_steps_trained: 2798880
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,280,87253.9,2798880,1.45114,6.87,-1.62,86.6842




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2808876
  custom_metrics: {}
  date: 2021-11-15_14-58-47
  done: false
  episode_len_mean: 87.2
  episode_media: {}
  episode_reward_max: 8.730000000000013
  episode_reward_mean: 1.1377391304347848
  episode_reward_min: -2.1599999999999997
  episodes_this_iter: 115
  episodes_total: 29889
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.655634123749203
          entropy_coeff: 0.01
          kl: 0.00776028272930397
          policy_loss: -0.026181193640153123
          total_loss: 0.14625270032745777
          vf_explained_var: 0.7214721441268921
          vf_loss: 0.17910147990720968
    num_agent_steps_sampled: 2808876
    num_agent_steps_trained: 2808876
    num_steps_sampled: 2808876
    num_steps_trained: 2808876
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,281,87584,2808876,1.13774,8.73,-2.16,87.2




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2818872
  custom_metrics: {}
  date: 2021-11-15_15-04-14
  done: false
  episode_len_mean: 88.0701754385965
  episode_media: {}
  episode_reward_max: 8.63000000000001
  episode_reward_mean: 1.6028947368421083
  episode_reward_min: -2.14
  episodes_this_iter: 114
  episodes_total: 30003
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6523046091071563
          entropy_coeff: 0.01
          kl: 0.00842571506655952
          policy_loss: -0.021125656276400017
          total_loss: 0.20809812110880566
          vf_explained_var: 0.7357605695724487
          vf_loss: 0.23415263838206346
    num_agent_steps_sampled: 2818872
    num_agent_steps_trained: 2818872
    num_steps_sampled: 2818872
    num_steps_trained: 2818872
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,282,87910.6,2818872,1.60289,8.63,-2.14,88.0702




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2828868
  custom_metrics: {}
  date: 2021-11-15_15-09-41
  done: false
  episode_len_mean: 87.07826086956521
  episode_media: {}
  episode_reward_max: 6.870000000000009
  episode_reward_mean: 1.3946086956521768
  episode_reward_min: -1.7500000000000007
  episodes_this_iter: 115
  episodes_total: 30118
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.646668238925119
          entropy_coeff: 0.01
          kl: 0.007541064797632011
          policy_loss: -0.02410719100939126
          total_loss: 0.1543666907478697
          vf_explained_var: 0.7655453085899353
          vf_loss: 0.18561364005789416
    num_agent_steps_sampled: 2828868
    num_agent_steps_trained: 2828868
    num_steps_sampled: 2828868
    num_steps_trained: 2828868
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,283,88237.5,2828868,1.39461,6.87,-1.75,87.0783




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2838864
  custom_metrics: {}
  date: 2021-11-15_15-15-06
  done: false
  episode_len_mean: 87.54385964912281
  episode_media: {}
  episode_reward_max: 8.950000000000014
  episode_reward_mean: 1.5263157894736867
  episode_reward_min: -1.7600000000000007
  episodes_this_iter: 114
  episodes_total: 30232
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.65285975871942
          entropy_coeff: 0.01
          kl: 0.007053676853333646
          policy_loss: -0.026556307744855682
          total_loss: 0.16008791165450253
          vf_explained_var: 0.7392191290855408
          vf_loss: 0.19509501499362672
    num_agent_steps_sampled: 2838864
    num_agent_steps_trained: 2838864
    num_steps_sampled: 2838864
    num_steps_trained: 2838864
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,284,88562.7,2838864,1.52632,8.95,-1.76,87.5439


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2848860
  custom_metrics: {}
  date: 2021-11-15_15-20-19
  done: false
  episode_len_mean: 88.0
  episode_media: {}
  episode_reward_max: 6.940000000000012
  episode_reward_mean: 1.4977192982456167
  episode_reward_min: -1.7300000000000009
  episodes_this_iter: 114
  episodes_total: 30346
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6523593219936403
          entropy_coeff: 0.01
          kl: 0.007756680421724107
          policy_loss: -0.023837638208563
          total_loss: 0.18665327505511034
          vf_explained_var: 0.7182493805885315
          vf_loss: 0.21713498306190038
    num_agent_steps_sampled: 2848860
    num_agent_steps_trained: 2848860
    num_steps_sampled: 2848860
    num_steps_trained: 2848860
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,285,88875.7,2848860,1.49772,6.94,-1.73,88




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2858856
  custom_metrics: {}
  date: 2021-11-15_15-25-43
  done: false
  episode_len_mean: 87.34210526315789
  episode_media: {}
  episode_reward_max: 9.010000000000012
  episode_reward_mean: 1.4463157894736869
  episode_reward_min: -1.7600000000000007
  episodes_this_iter: 114
  episodes_total: 30460
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.654790788022881
          entropy_coeff: 0.01
          kl: 0.006676000288664849
          policy_loss: -0.02543826985817689
          total_loss: 0.1457627041086268
          vf_explained_var: 0.7216504812240601
          vf_loss: 0.18063902244425545
    num_agent_steps_sampled: 2858856
    num_agent_steps_trained: 2858856
    num_steps_sampled: 2858856
    num_steps_trained: 2858856
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,286,89199.4,2858856,1.44632,9.01,-1.76,87.3421




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2868852
  custom_metrics: {}
  date: 2021-11-15_15-31-08
  done: false
  episode_len_mean: 88.33333333333333
  episode_media: {}
  episode_reward_max: 7.120000000000013
  episode_reward_mean: 1.4328947368421079
  episode_reward_min: -1.9100000000000008
  episodes_this_iter: 114
  episodes_total: 30574
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6468829965998983
          entropy_coeff: 0.01
          kl: 0.008086170210617255
          policy_loss: -0.026807709507898896
          total_loss: 0.1796597899065122
          vf_explained_var: 0.7438666224479675
          vf_loss: 0.21221235998404714
    num_agent_steps_sampled: 2868852
    num_agent_steps_trained: 2868852
    num_steps_sampled: 2868852
    num_steps_trained: 2868852
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,287,89524.7,2868852,1.43289,7.12,-1.91,88.3333


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2878848
  custom_metrics: {}
  date: 2021-11-15_15-36-22
  done: false
  episode_len_mean: 88.47321428571429
  episode_media: {}
  episode_reward_max: 6.170000000000013
  episode_reward_mean: 1.3807142857142882
  episode_reward_min: -1.6000000000000005
  episodes_this_iter: 112
  episodes_total: 30686
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6423077253194958
          entropy_coeff: 0.01
          kl: 0.006953012103921516
          policy_loss: -0.027424035769783788
          total_loss: 0.138258271652441
          vf_explained_var: 0.7820891737937927
          vf_loss: 0.1742855753351608
    num_agent_steps_sampled: 2878848
    num_agent_steps_trained: 2878848
    num_steps_sampled: 2878848
    num_steps_trained: 2878848
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,288,89837.7,2878848,1.38071,6.17,-1.6,88.4732




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2888844
  custom_metrics: {}
  date: 2021-11-15_15-41-46
  done: false
  episode_len_mean: 87.20869565217392
  episode_media: {}
  episode_reward_max: 8.740000000000013
  episode_reward_mean: 1.35626086956522
  episode_reward_min: -1.880000000000001
  episodes_this_iter: 115
  episodes_total: 30801
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6408994195807693
          entropy_coeff: 0.01
          kl: 0.007981928905210404
          policy_loss: -0.027000903058001118
          total_loss: 0.140952922252572
          vf_explained_var: 0.7743754386901855
          vf_loss: 0.17390600918139468
    num_agent_steps_sampled: 2888844
    num_agent_steps_trained: 2888844
    num_steps_sampled: 2888844
    num_steps_trained: 2888844
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,289,90162.1,2888844,1.35626,8.74,-1.88,87.2087




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2898840
  custom_metrics: {}
  date: 2021-11-15_15-47-15
  done: false
  episode_len_mean: 86.14782608695653
  episode_media: {}
  episode_reward_max: 7.110000000000011
  episode_reward_mean: 1.4619130434782635
  episode_reward_min: -1.6600000000000006
  episodes_this_iter: 115
  episodes_total: 30916
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6464540785194464
          entropy_coeff: 0.01
          kl: 0.0074345126179868335
          policy_loss: -0.025178764407865258
          total_loss: 0.16816043231087044
          vf_explained_var: 0.7629613876342773
          vf_loss: 0.20074989437364424
    num_agent_steps_sampled: 2898840
    num_agent_steps_trained: 2898840
    num_steps_sampled: 2898840
    num_steps_trained: 2898840


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,290,90491.1,2898840,1.46191,7.11,-1.66,86.1478




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2908836
  custom_metrics: {}
  date: 2021-11-15_15-52-40
  done: false
  episode_len_mean: 87.54385964912281
  episode_media: {}
  episode_reward_max: 11.27000000000001
  episode_reward_mean: 1.478070175438599
  episode_reward_min: -1.7900000000000005
  episodes_this_iter: 114
  episodes_total: 31030
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6347017905650993
          entropy_coeff: 0.01
          kl: 0.0074218083191151495
          policy_loss: -0.025112134432340533
          total_loss: 0.16571967169546928
          vf_explained_var: 0.7644220590591431
          vf_loss: 0.198157540965093
    num_agent_steps_sampled: 2908836
    num_agent_steps_trained: 2908836
    num_steps_sampled: 2908836
    num_steps_trained: 2908836
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,291,90816.3,2908836,1.47807,11.27,-1.79,87.5439




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2918832
  custom_metrics: {}
  date: 2021-11-15_15-58-05
  done: false
  episode_len_mean: 87.00869565217391
  episode_media: {}
  episode_reward_max: 10.680000000000012
  episode_reward_mean: 1.5225217391304375
  episode_reward_min: -1.7300000000000009
  episodes_this_iter: 115
  episodes_total: 31145
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6388682750555184
          entropy_coeff: 0.01
          kl: 0.008840300490093426
          policy_loss: -0.021394311408233693
          total_loss: 0.2008171129315041
          vf_explained_var: 0.6972230672836304
          vf_loss: 0.2259433837265222
    num_agent_steps_sampled: 2918832
    num_agent_steps_trained: 2918832
    num_steps_sampled: 2918832
    num_steps_trained: 2918832
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,292,91141.1,2918832,1.52252,10.68,-1.73,87.0087


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2928828
  custom_metrics: {}
  date: 2021-11-15_16-03-22
  done: false
  episode_len_mean: 85.13559322033899
  episode_media: {}
  episode_reward_max: 7.010000000000012
  episode_reward_mean: 1.408305084745765
  episode_reward_min: -1.6500000000000008
  episodes_this_iter: 118
  episodes_total: 31263
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.640412743274982
          entropy_coeff: 0.01
          kl: 0.006827094695670493
          policy_loss: -0.024582035464640613
          total_loss: 0.18323576962097715
          vf_explained_var: 0.7163925170898438
          vf_loss: 0.21672483581341168
    num_agent_steps_sampled: 2928828
    num_agent_steps_trained: 2928828
    num_steps_sampled: 2928828
    num_steps_trained: 2928828
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,293,91457.8,2928828,1.40831,7.01,-1.65,85.1356




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2938824
  custom_metrics: {}
  date: 2021-11-15_16-09-08
  done: false
  episode_len_mean: 86.0
  episode_media: {}
  episode_reward_max: 8.880000000000011
  episode_reward_mean: 1.3591379310344849
  episode_reward_min: -1.7700000000000007
  episodes_this_iter: 116
  episodes_total: 31379
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.634008427257212
          entropy_coeff: 0.01
          kl: 0.007869304079555557
          policy_loss: -0.02420018339712714
          total_loss: 0.18012585181336946
          vf_explained_var: 0.726006805896759
          vf_loss: 0.21049795388443093
    num_agent_steps_sampled: 2938824
    num_agent_steps_trained: 2938824
    num_steps_sampled: 2938824
    num_steps_trained: 2938824
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,294,91804.4,2938824,1.35914,8.88,-1.77,86




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2948820
  custom_metrics: {}
  date: 2021-11-15_16-14-43
  done: false
  episode_len_mean: 86.34482758620689
  episode_media: {}
  episode_reward_max: 7.080000000000014
  episode_reward_mean: 1.6817241379310373
  episode_reward_min: -1.820000000000001
  episodes_this_iter: 116
  episodes_total: 31495
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.640677185751434
          entropy_coeff: 0.01
          kl: 0.0065630192162371955
          policy_loss: -0.023040344760331333
          total_loss: 0.17393529638011232
          vf_explained_var: 0.7327281832695007
          vf_loss: 0.20656211246203027
    num_agent_steps_sampled: 2948820
    num_agent_steps_trained: 2948820
    num_steps_sampled: 2948820
    num_steps_trained: 2948820
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,295,92139.3,2948820,1.68172,7.08,-1.82,86.3448


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2958816
  custom_metrics: {}
  date: 2021-11-15_16-20-06
  done: false
  episode_len_mean: 86.86086956521739
  episode_media: {}
  episode_reward_max: 8.730000000000011
  episode_reward_mean: 0.9758260869565235
  episode_reward_min: -1.7100000000000006
  episodes_this_iter: 115
  episodes_total: 31610
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.636964913107391
          entropy_coeff: 0.01
          kl: 0.006556558885997486
          policy_loss: -0.024948935095682485
          total_loss: 0.15696529176535928
          vf_explained_var: 0.6780102849006653
          vf_loss: 0.19148013264728853
    num_agent_steps_sampled: 2958816
    num_agent_steps_trained: 2958816
    num_steps_sampled: 2958816
    num_steps_trained: 2958816
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,296,92461.5,2958816,0.975826,8.73,-1.71,86.8609




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2968812
  custom_metrics: {}
  date: 2021-11-15_16-25-46
  done: false
  episode_len_mean: 86.04273504273505
  episode_media: {}
  episode_reward_max: 6.880000000000008
  episode_reward_mean: 1.1480341880341904
  episode_reward_min: -1.8800000000000008
  episodes_this_iter: 117
  episodes_total: 31727
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.644962383233584
          entropy_coeff: 0.01
          kl: 0.007961031032814177
          policy_loss: -0.022487113951968077
          total_loss: 0.1724280895259327
          vf_explained_var: 0.6805382370948792
          vf_loss: 0.20096157648019555
    num_agent_steps_sampled: 2968812
    num_agent_steps_trained: 2968812
    num_steps_sampled: 2968812
    num_steps_trained: 2968812
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,297,92801.9,2968812,1.14803,6.88,-1.88,86.0427




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2978808
  custom_metrics: {}
  date: 2021-11-15_16-31-19
  done: false
  episode_len_mean: 87.44736842105263
  episode_media: {}
  episode_reward_max: 7.240000000000006
  episode_reward_mean: 1.3224561403508797
  episode_reward_min: -1.940000000000001
  episodes_this_iter: 114
  episodes_total: 31841
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.641324911871527
          entropy_coeff: 0.01
          kl: 0.0069298912651530415
          policy_loss: -0.024680102509884245
          total_loss: 0.16195575748411062
          vf_explained_var: 0.7301974892616272
          vf_loss: 0.19528855585023505
    num_agent_steps_sampled: 2978808
    num_agent_steps_trained: 2978808
    num_steps_sampled: 2978808
    num_steps_trained: 2978808
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,298,93134.8,2978808,1.32246,7.24,-1.94,87.4474




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2988804
  custom_metrics: {}
  date: 2021-11-15_16-36-46
  done: false
  episode_len_mean: 86.65217391304348
  episode_media: {}
  episode_reward_max: 7.210000000000008
  episode_reward_mean: 1.750695652173916
  episode_reward_min: -1.840000000000001
  episodes_this_iter: 115
  episodes_total: 31956
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6318466040823196
          entropy_coeff: 0.01
          kl: 0.006186887919006856
          policy_loss: -0.021832756924196187
          total_loss: 0.18076333335927156
          vf_explained_var: 0.7421691417694092
          vf_loss: 0.2130582393059491
    num_agent_steps_sampled: 2988804
    num_agent_steps_trained: 2988804
    num_steps_sampled: 2988804
    num_steps_trained: 2988804
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,299,93461.7,2988804,1.7507,7.21,-1.84,86.6522


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 2998800
  custom_metrics: {}
  date: 2021-11-15_16-42-01
  done: false
  episode_len_mean: 86.80172413793103
  episode_media: {}
  episode_reward_max: 8.640000000000008
  episode_reward_mean: 1.684655172413796
  episode_reward_min: -1.720000000000001
  episodes_this_iter: 116
  episodes_total: 32072
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6306792794129787
          entropy_coeff: 0.01
          kl: 0.007129886990517685
          policy_loss: -0.023112370002951122
          total_loss: 0.2188606854343118
          vf_explained_var: 0.7224344611167908
          vf_loss: 0.25000672838212845
    num_agent_steps_sampled: 2998800
    num_agent_steps_trained: 2998800
    num_steps_sampled: 2998800
    num_steps_trained: 2998800
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,300,93776.9,2998800,1.68466,8.64,-1.72,86.8017




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3008796
  custom_metrics: {}
  date: 2021-11-15_16-47-45
  done: false
  episode_len_mean: 86.13913043478261
  episode_media: {}
  episode_reward_max: 7.140000000000011
  episode_reward_mean: 1.467478260869568
  episode_reward_min: -2.3000000000000003
  episodes_this_iter: 115
  episodes_total: 32187
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6328452088893988
          entropy_coeff: 0.01
          kl: 0.006320053456304075
          policy_loss: -0.02129911013886842
          total_loss: 0.1700288786377726
          vf_explained_var: 0.7579931020736694
          vf_loss: 0.20145883539285606
    num_agent_steps_sampled: 3008796
    num_agent_steps_trained: 3008796
    num_steps_sampled: 3008796
    num_steps_trained: 3008796
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,301,94121,3008796,1.46748,7.14,-2.3,86.1391




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3018792
  custom_metrics: {}
  date: 2021-11-15_16-53-15
  done: false
  episode_len_mean: 87.38260869565218
  episode_media: {}
  episode_reward_max: 10.350000000000017
  episode_reward_mean: 1.251304347826089
  episode_reward_min: -1.760000000000001
  episodes_this_iter: 115
  episodes_total: 32302
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6382649627506223
          entropy_coeff: 0.01
          kl: 0.006063233875182315
          policy_loss: -0.024256679170534142
          total_loss: 0.1579634363966015
          vf_explained_var: 0.7372511625289917
          vf_loss: 0.19306336021783133
    num_agent_steps_sampled: 3018792
    num_agent_steps_trained: 3018792
    num_steps_sampled: 3018792
    num_steps_trained: 3018792
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,302,94450.7,3018792,1.2513,10.35,-1.76,87.3826


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3028788
  custom_metrics: {}
  date: 2021-11-15_16-58-36
  done: false
  episode_len_mean: 87.85964912280701
  episode_media: {}
  episode_reward_max: 14.950000000000014
  episode_reward_mean: 1.478333333333336
  episode_reward_min: -2.2100000000000004
  episodes_this_iter: 114
  episodes_total: 32416
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.636877772033724
          entropy_coeff: 0.01
          kl: 0.007191338335693134
          policy_loss: -0.02416983887147254
          total_loss: 0.183209233575214
          vf_explained_var: 0.7068713903427124
          vf_loss: 0.21531723643868014
    num_agent_steps_sampled: 3028788
    num_agent_steps_trained: 3028788
    num_steps_sampled: 3028788
    num_steps_trained: 3028788
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,303,94771.3,3028788,1.47833,14.95,-2.21,87.8596




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3038784
  custom_metrics: {}
  date: 2021-11-15_17-04-19
  done: false
  episode_len_mean: 87.64601769911505
  episode_media: {}
  episode_reward_max: 12.690000000000015
  episode_reward_mean: 1.3160176991150467
  episode_reward_min: -1.8000000000000007
  episodes_this_iter: 113
  episodes_total: 32529
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6297602735014043
          entropy_coeff: 0.01
          kl: 0.007034053847091862
          policy_loss: -0.0226226187358873
          total_loss: 0.18313409512520282
          vf_explained_var: 0.6777095794677734
          vf_loss: 0.21402680639289
    num_agent_steps_sampled: 3038784
    num_agent_steps_trained: 3038784
    num_steps_sampled: 3038784
    num_steps_trained: 3038784
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,304,95114.9,3038784,1.31602,12.69,-1.8,87.646




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3048780
  custom_metrics: {}
  date: 2021-11-15_17-09-52
  done: false
  episode_len_mean: 87.61739130434782
  episode_media: {}
  episode_reward_max: 8.900000000000011
  episode_reward_mean: 1.7033913043478295
  episode_reward_min: -1.8500000000000012
  episodes_this_iter: 115
  episodes_total: 32644
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.62935480072967
          entropy_coeff: 0.01
          kl: 0.00690073295901203
          policy_loss: -0.020592159174510048
          total_loss: 0.19374156031880974
          vf_explained_var: 0.7308907508850098
          vf_loss: 0.22294144393422474
    num_agent_steps_sampled: 3048780
    num_agent_steps_trained: 3048780
    num_steps_sampled: 3048780
    num_steps_trained: 3048780
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,305,95447.6,3048780,1.70339,8.9,-1.85,87.6174


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3058776
  custom_metrics: {}
  date: 2021-11-15_17-15-14
  done: false
  episode_len_mean: 88.70535714285714
  episode_media: {}
  episode_reward_max: 10.710000000000012
  episode_reward_mean: 1.4899107142857166
  episode_reward_min: -1.860000000000001
  episodes_this_iter: 112
  episodes_total: 32756
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.632505291343754
          entropy_coeff: 0.01
          kl: 0.007406455243958613
          policy_loss: -0.01939932514483539
          total_loss: 0.18852940380541433
          vf_explained_var: 0.6917382478713989
          vf_loss: 0.2152718473954174
    num_agent_steps_sampled: 3058776
    num_agent_steps_trained: 3058776
    num_steps_sampled: 3058776
    num_steps_trained: 3058776
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,306,95769.2,3058776,1.48991,10.71,-1.86,88.7054




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3068772
  custom_metrics: {}
  date: 2021-11-15_17-20-56
  done: false
  episode_len_mean: 87.4695652173913
  episode_media: {}
  episode_reward_max: 8.870000000000013
  episode_reward_mean: 1.372521739130437
  episode_reward_min: -1.7500000000000009
  episodes_this_iter: 115
  episodes_total: 32871
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.631755698644198
          entropy_coeff: 0.01
          kl: 0.0076226899702534125
          policy_loss: -0.025529614417280397
          total_loss: 0.1721180203784671
          vf_explained_var: 0.6931282877922058
          vf_loss: 0.2044290723088078
    num_agent_steps_sampled: 3068772
    num_agent_steps_trained: 3068772
    num_steps_sampled: 3068772
    num_steps_trained: 3068772
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,307,96111.6,3068772,1.37252,8.87,-1.75,87.4696


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3078768
  custom_metrics: {}
  date: 2021-11-15_17-26-10
  done: false
  episode_len_mean: 87.92035398230088
  episode_media: {}
  episode_reward_max: 11.220000000000006
  episode_reward_mean: 1.580707964601773
  episode_reward_min: -2.149999999999999
  episodes_this_iter: 113
  episodes_total: 32984
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6228952583084757
          entropy_coeff: 0.01
          kl: 0.008327088054782146
          policy_loss: -0.018481114307720946
          total_loss: 0.21842568449031274
          vf_explained_var: 0.6822929978370667
          vf_loss: 0.24179433587238064
    num_agent_steps_sampled: 3078768
    num_agent_steps_trained: 3078768
    num_steps_sampled: 3078768
    num_steps_trained: 3078768
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,308,96426,3078768,1.58071,11.22,-2.15,87.9204




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3088764
  custom_metrics: {}
  date: 2021-11-15_17-31-36
  done: false
  episode_len_mean: 87.15652173913044
  episode_media: {}
  episode_reward_max: 6.640000000000013
  episode_reward_mean: 1.5274782608695685
  episode_reward_min: -2.089999999999999
  episodes_this_iter: 115
  episodes_total: 33099
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.630467592039679
          entropy_coeff: 0.01
          kl: 0.0067474622097759675
          policy_loss: -0.022002451448168002
          total_loss: 0.17720547838725595
          vf_explained_var: 0.6748864650726318
          vf_loss: 0.20821959802196321
    num_agent_steps_sampled: 3088764
    num_agent_steps_trained: 3088764
    num_steps_sampled: 3088764
    num_steps_trained: 3088764
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,309,96751.8,3088764,1.52748,6.64,-2.09,87.1565




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3098760
  custom_metrics: {}
  date: 2021-11-15_17-37-14
  done: false
  episode_len_mean: 87.87719298245614
  episode_media: {}
  episode_reward_max: 8.940000000000014
  episode_reward_mean: 1.1973684210526336
  episode_reward_min: -1.8300000000000007
  episodes_this_iter: 114
  episodes_total: 33213
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.640706805082468
          entropy_coeff: 0.01
          kl: 0.006716355285847178
          policy_loss: -0.021190916856305083
          total_loss: 0.1679837241012635
          vf_explained_var: 0.661496102809906
          vf_loss: 0.19836842520321663
    num_agent_steps_sampled: 3098760
    num_agent_steps_trained: 3098760
    num_steps_sampled: 3098760
    num_steps_trained: 3098760
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,310,97089.8,3098760,1.19737,8.94,-1.83,87.8772


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3108756
  custom_metrics: {}
  date: 2021-11-15_17-42-29
  done: false
  episode_len_mean: 88.99107142857143
  episode_media: {}
  episode_reward_max: 8.69000000000001
  episode_reward_mean: 1.5929464285714314
  episode_reward_min: -2.0200000000000005
  episodes_this_iter: 112
  episodes_total: 33325
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.63660413424174
          entropy_coeff: 0.01
          kl: 0.007640272621378186
          policy_loss: -0.02157511968070116
          total_loss: 0.17803599323886327
          vf_explained_var: 0.7321943044662476
          vf_loss: 0.20639597202340762
    num_agent_steps_sampled: 3108756
    num_agent_steps_trained: 3108756
    num_steps_sampled: 3108756
    num_steps_trained: 3108756
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,311,97404.4,3108756,1.59295,8.69,-2.02,88.9911




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3118752
  custom_metrics: {}
  date: 2021-11-15_17-47-55
  done: false
  episode_len_mean: 87.78260869565217
  episode_media: {}
  episode_reward_max: 7.120000000000008
  episode_reward_mean: 1.2889565217391323
  episode_reward_min: -2.000000000000001
  episodes_this_iter: 115
  episodes_total: 33440
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6374089564013685
          entropy_coeff: 0.01
          kl: 0.006505134573942877
          policy_loss: -0.0238355853037638
          total_loss: 0.1878474609934303
          vf_explained_var: 0.6593149900436401
          vf_loss: 0.22138518831636916
    num_agent_steps_sampled: 3118752
    num_agent_steps_trained: 3118752
    num_steps_sampled: 3118752
    num_steps_trained: 3118752
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,312,97730.8,3118752,1.28896,7.12,-2,87.7826




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3128748
  custom_metrics: {}
  date: 2021-11-15_17-53-23
  done: false
  episode_len_mean: 87.65486725663717
  episode_media: {}
  episode_reward_max: 7.10000000000001
  episode_reward_mean: 1.2330973451327456
  episode_reward_min: -2.1999999999999997
  episodes_this_iter: 113
  episodes_total: 33553
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.650347957651839
          entropy_coeff: 0.01
          kl: 0.006409397958732688
          policy_loss: -0.01929246527899025
          total_loss: 0.18087586713954806
          vf_explained_var: 0.7195335626602173
          vf_loss: 0.21024522597734363
    num_agent_steps_sampled: 3128748
    num_agent_steps_trained: 3128748
    num_steps_sampled: 3128748
    num_steps_trained: 3128748
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,313,98058.4,3128748,1.2331,7.1,-2.2,87.6549




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3138744
  custom_metrics: {}
  date: 2021-11-15_17-58-49
  done: false
  episode_len_mean: 88.13274336283186
  episode_media: {}
  episode_reward_max: 6.660000000000005
  episode_reward_mean: 1.4382300884955777
  episode_reward_min: -2.1300000000000003
  episodes_this_iter: 113
  episodes_total: 33666
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.645185822401291
          entropy_coeff: 0.01
          kl: 0.007489873169090222
          policy_loss: -0.020508208261946072
          total_loss: 0.18351972552183538
          vf_explained_var: 0.7409586906433105
          vf_loss: 0.21128406626267884
    num_agent_steps_sampled: 3138744
    num_agent_steps_trained: 3138744
    num_steps_sampled: 3138744
    num_steps_trained: 3138744
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,314,98384.1,3138744,1.43823,6.66,-2.13,88.1327




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3148740
  custom_metrics: {}
  date: 2021-11-15_18-04-16
  done: false
  episode_len_mean: 87.6
  episode_media: {}
  episode_reward_max: 9.17
  episode_reward_mean: 1.33921739130435
  episode_reward_min: -1.6500000000000006
  episodes_this_iter: 115
  episodes_total: 33781
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6484634945535253
          entropy_coeff: 0.01
          kl: 0.007958798063409926
          policy_loss: -0.01933224654923647
          total_loss: 0.18683275603052452
          vf_explained_var: 0.7321537733078003
          vf_loss: 0.21225211006055913
    num_agent_steps_sampled: 3148740
    num_agent_steps_trained: 3148740
    num_steps_sampled: 3148740
    num_steps_trained: 3148740
  iterations_since_restore: 31

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,315,98710.9,3148740,1.33922,9.17,-1.65,87.6




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3158736
  custom_metrics: {}
  date: 2021-11-15_18-09-45
  done: false
  episode_len_mean: 87.03508771929825
  episode_media: {}
  episode_reward_max: 7.010000000000008
  episode_reward_mean: 1.065877192982458
  episode_reward_min: -1.850000000000001
  episodes_this_iter: 114
  episodes_total: 33895
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.646444957378583
          entropy_coeff: 0.01
          kl: 0.006108540304997093
          policy_loss: -0.01963062054103511
          total_loss: 0.17895846297232132
          vf_explained_var: 0.6999107003211975
          vf_loss: 0.2093980132602155
    num_agent_steps_sampled: 3158736
    num_agent_steps_trained: 3158736
    num_steps_sampled: 3158736
    num_steps_trained: 3158736
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,316,99039.8,3158736,1.06588,7.01,-1.85,87.0351




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3168732
  custom_metrics: {}
  date: 2021-11-15_18-15-09
  done: false
  episode_len_mean: 88.33333333333333
  episode_media: {}
  episode_reward_max: 9.010000000000007
  episode_reward_mean: 1.5626315789473713
  episode_reward_min: -1.8000000000000007
  episodes_this_iter: 114
  episodes_total: 34009
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6449522303719806
          entropy_coeff: 0.01
          kl: 0.00820863992944504
          policy_loss: -0.017384516942895886
          total_loss: 0.20951372945848376
          vf_explained_var: 0.6964545249938965
          vf_loss: 0.2323099222870018
    num_agent_steps_sampled: 3168732
    num_agent_steps_trained: 3168732
    num_steps_sampled: 3168732
    num_steps_trained: 3168732
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,317,99364.3,3168732,1.56263,9.01,-1.8,88.3333




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3178728
  custom_metrics: {}
  date: 2021-11-15_18-20-33
  done: false
  episode_len_mean: 88.04424778761062
  episode_media: {}
  episode_reward_max: 7.080000000000009
  episode_reward_mean: 1.101592920353984
  episode_reward_min: -1.950000000000001
  episodes_this_iter: 113
  episodes_total: 34122
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.649913974195464
          entropy_coeff: 0.01
          kl: 0.0072656063491340584
          policy_loss: -0.016378117814405353
          total_loss: 0.1817804524436211
          vf_explained_var: 0.6414932608604431
          vf_loss: 0.2060367551154624
    num_agent_steps_sampled: 3178728
    num_agent_steps_trained: 3178728
    num_steps_sampled: 3178728
    num_steps_trained: 3178728
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,318,99688.6,3178728,1.10159,7.08,-1.95,88.0442




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3188724
  custom_metrics: {}
  date: 2021-11-15_18-25-59
  done: false
  episode_len_mean: 88.19469026548673
  episode_media: {}
  episode_reward_max: 6.710000000000015
  episode_reward_mean: 1.480442477876109
  episode_reward_min: -1.8500000000000008
  episodes_this_iter: 113
  episodes_total: 34235
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.648565818509485
          entropy_coeff: 0.01
          kl: 0.006753509424836154
          policy_loss: -0.016464191063060466
          total_loss: 0.19118319213127669
          vf_explained_var: 0.7128424644470215
          vf_loss: 0.21682453549299865
    num_agent_steps_sampled: 3188724
    num_agent_steps_trained: 3188724
    num_steps_sampled: 3188724
    num_steps_trained: 3188724
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,319,100014,3188724,1.48044,6.71,-1.85,88.1947




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3198720
  custom_metrics: {}
  date: 2021-11-15_18-31-45
  done: false
  episode_len_mean: 89.63963963963964
  episode_media: {}
  episode_reward_max: 8.580000000000009
  episode_reward_mean: 1.6767567567567596
  episode_reward_min: -1.6800000000000008
  episodes_this_iter: 111
  episodes_total: 34346
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.647530179349785
          entropy_coeff: 0.01
          kl: 0.006678835233178604
          policy_loss: -0.018535937525284214
          total_loss: 0.1801770318992054
          vf_explained_var: 0.7419973015785217
          vf_loss: 0.20807114669607363
    num_agent_steps_sampled: 3198720
    num_agent_steps_trained: 3198720
    num_steps_sampled: 3198720
    num_steps_trained: 3198720
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,320,100360,3198720,1.67676,8.58,-1.68,89.6396




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3208716
  custom_metrics: {}
  date: 2021-11-15_18-37-07
  done: false
  episode_len_mean: 90.29729729729729
  episode_media: {}
  episode_reward_max: 6.960000000000014
  episode_reward_mean: 1.316306306306309
  episode_reward_min: -1.970000000000001
  episodes_this_iter: 111
  episodes_total: 34457
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6440976814327075
          entropy_coeff: 0.01
          kl: 0.006312677629358491
          policy_loss: -0.019996570912786782
          total_loss: 0.151094340490432
          vf_explained_var: 0.7425070405006409
          vf_loss: 0.18135318557182567
    num_agent_steps_sampled: 3208716
    num_agent_steps_trained: 3208716
    num_steps_sampled: 3208716
    num_steps_trained: 3208716
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,321,100682,3208716,1.31631,6.96,-1.97,90.2973




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3218712
  custom_metrics: {}
  date: 2021-11-15_18-42-32
  done: false
  episode_len_mean: 88.98214285714286
  episode_media: {}
  episode_reward_max: 6.580000000000011
  episode_reward_mean: 1.2319642857142878
  episode_reward_min: -1.9800000000000009
  episodes_this_iter: 112
  episodes_total: 34569
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6453843257366083
          entropy_coeff: 0.01
          kl: 0.005976540643417426
          policy_loss: -0.017910961110670216
          total_loss: 0.12259675189972115
          vf_explained_var: 0.788560152053833
          vf_loss: 0.15164433647241665
    num_agent_steps_sampled: 3218712
    num_agent_steps_trained: 3218712
    num_steps_sampled: 3218712
    num_steps_trained: 3218712
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,322,101008,3218712,1.23196,6.58,-1.98,88.9821




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3228708
  custom_metrics: {}
  date: 2021-11-15_18-47-56
  done: false
  episode_len_mean: 89.07964601769912
  episode_media: {}
  episode_reward_max: 9.350000000000009
  episode_reward_mean: 1.2239823008849577
  episode_reward_min: -2.2799999999999976
  episodes_this_iter: 113
  episodes_total: 34682
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.645501290427314
          entropy_coeff: 0.01
          kl: 0.006273125309182201
          policy_loss: -0.02043188033490163
          total_loss: 0.1596437335300904
          vf_explained_var: 0.7418593764305115
          vf_loss: 0.1904532930125188
    num_agent_steps_sampled: 3228708
    num_agent_steps_trained: 3228708
    num_steps_sampled: 3228708
    num_steps_trained: 3228708
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,323,101331,3228708,1.22398,9.35,-2.28,89.0796




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3238704
  custom_metrics: {}
  date: 2021-11-15_18-53-19
  done: false
  episode_len_mean: 90.32432432432432
  episode_media: {}
  episode_reward_max: 6.610000000000005
  episode_reward_mean: 1.3630630630630651
  episode_reward_min: -1.9900000000000009
  episodes_this_iter: 111
  episodes_total: 34793
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6487839337088106
          entropy_coeff: 0.01
          kl: 0.006351520931809331
          policy_loss: -0.016128978823335506
          total_loss: 0.17073756572511842
          vf_explained_var: 0.7274264097213745
          vf_loss: 0.19707613093619292
    num_agent_steps_sampled: 3238704
    num_agent_steps_trained: 3238704
    num_steps_sampled: 3238704
    num_steps_trained: 3238704
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,324,101654,3238704,1.36306,6.61,-1.99,90.3243




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3248700
  custom_metrics: {}
  date: 2021-11-15_18-58-42
  done: false
  episode_len_mean: 89.94594594594595
  episode_media: {}
  episode_reward_max: 8.740000000000014
  episode_reward_mean: 1.629549549549553
  episode_reward_min: -2.569999999999998
  episodes_this_iter: 111
  episodes_total: 34904
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6429239236391506
          entropy_coeff: 0.01
          kl: 0.00669555658217569
          policy_loss: -0.018022162437789205
          total_loss: 0.19700789575536665
          vf_explained_var: 0.7240305542945862
          vf_loss: 0.22429931873702405
    num_agent_steps_sampled: 3248700
    num_agent_steps_trained: 3248700
    num_steps_sampled: 3248700
    num_steps_trained: 3248700
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,325,101977,3248700,1.62955,8.74,-2.57,89.9459




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3258696
  custom_metrics: {}
  date: 2021-11-15_19-04-07
  done: false
  episode_len_mean: 89.8018018018018
  episode_media: {}
  episode_reward_max: 6.8400000000000105
  episode_reward_mean: 1.3940540540540565
  episode_reward_min: -1.780000000000001
  episodes_this_iter: 111
  episodes_total: 35015
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6394898204721957
          entropy_coeff: 0.01
          kl: 0.006782380673826925
          policy_loss: -0.017992510043211982
          total_loss: 0.18546617986578653
          vf_explained_var: 0.7377334237098694
          vf_loss: 0.21247108806975376
    num_agent_steps_sampled: 3258696
    num_agent_steps_trained: 3258696
    num_steps_sampled: 3258696
    num_steps_trained: 3258696
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,326,102302,3258696,1.39405,6.84,-1.78,89.8018




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3268692
  custom_metrics: {}
  date: 2021-11-15_19-09-32
  done: false
  episode_len_mean: 89.63392857142857
  episode_media: {}
  episode_reward_max: 8.640000000000013
  episode_reward_mean: 1.6320535714285747
  episode_reward_min: -1.8000000000000007
  episodes_this_iter: 112
  episodes_total: 35127
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.639966289202372
          entropy_coeff: 0.01
          kl: 0.006118114430202502
          policy_loss: -0.018349105158709307
          total_loss: 0.1418723732742489
          vf_explained_var: 0.7860665321350098
          vf_loss: 0.17094108301541236
    num_agent_steps_sampled: 3268692
    num_agent_steps_trained: 3268692
    num_steps_sampled: 3268692
    num_steps_trained: 3268692
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,327,102627,3268692,1.63205,8.64,-1.8,89.6339




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3278688
  custom_metrics: {}
  date: 2021-11-15_19-14-57
  done: false
  episode_len_mean: 89.08035714285714
  episode_media: {}
  episode_reward_max: 8.960000000000015
  episode_reward_mean: 1.5278571428571455
  episode_reward_min: -1.9100000000000008
  episodes_this_iter: 112
  episodes_total: 35239
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6447768616880105
          entropy_coeff: 0.01
          kl: 0.006353590668192172
          policy_loss: -0.0169320164140887
          total_loss: 0.16891240685318526
          vf_explained_var: 0.7207942008972168
          vf_loss: 0.1960086337951386
    num_agent_steps_sampled: 3278688
    num_agent_steps_trained: 3278688
    num_steps_sampled: 3278688
    num_steps_trained: 3278688
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,328,102952,3278688,1.52786,8.96,-1.91,89.0804


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3288684
  custom_metrics: {}
  date: 2021-11-15_19-20-07
  done: false
  episode_len_mean: 89.79464285714286
  episode_media: {}
  episode_reward_max: 7.110000000000003
  episode_reward_mean: 1.0688392857142879
  episode_reward_min: -2.039999999999999
  episodes_this_iter: 112
  episodes_total: 35351
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6475559297789877
          entropy_coeff: 0.01
          kl: 0.006181603830322382
          policy_loss: -0.016850113423748148
          total_loss: 0.1593586779334861
          vf_explained_var: 0.685256838798523
          vf_loss: 0.1868415772043264
    num_agent_steps_sampled: 3288684
    num_agent_steps_trained: 3288684
    num_steps_sampled: 3288684
    num_steps_trained: 3288684
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,329,103262,3288684,1.06884,7.11,-2.04,89.7946




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3298680
  custom_metrics: {}
  date: 2021-11-15_19-25-47
  done: false
  episode_len_mean: 88.42477876106194
  episode_media: {}
  episode_reward_max: 6.70000000000001
  episode_reward_mean: 1.2122123893805334
  episode_reward_min: -1.850000000000001
  episodes_this_iter: 113
  episodes_total: 35464
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.650546080740089
          entropy_coeff: 0.01
          kl: 0.006077226623361979
          policy_loss: -0.01735160544068895
          total_loss: 0.14837007937817556
          vf_explained_var: 0.7161027193069458
          vf_loss: 0.17665187824262768
    num_agent_steps_sampled: 3298680
    num_agent_steps_trained: 3298680
    num_steps_sampled: 3298680
    num_steps_trained: 3298680
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,330,103602,3298680,1.21221,6.7,-1.85,88.4248




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3308676
  custom_metrics: {}
  date: 2021-11-15_19-31-10
  done: false
  episode_len_mean: 89.375
  episode_media: {}
  episode_reward_max: 8.860000000000008
  episode_reward_mean: 1.7215178571428602
  episode_reward_min: -1.9800000000000009
  episodes_this_iter: 112
  episodes_total: 35576
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6478334107969563
          entropy_coeff: 0.01
          kl: 0.006308201846465656
          policy_loss: -0.017073790296816674
          total_loss: 0.17112467443196375
          vf_explained_var: 0.6973744630813599
          vf_loss: 0.19850956693522503
    num_agent_steps_sampled: 3308676
    num_agent_steps_trained: 3308676
    num_steps_sampled: 3308676
    num_steps_trained: 3308676
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,331,103925,3308676,1.72152,8.86,-1.98,89.375


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3318672
  custom_metrics: {}
  date: 2021-11-15_19-36-18
  done: false
  episode_len_mean: 90.43636363636364
  episode_media: {}
  episode_reward_max: 6.490000000000009
  episode_reward_mean: 1.2916363636363664
  episode_reward_min: -1.9400000000000008
  episodes_this_iter: 110
  episodes_total: 35686
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6494303674779385
          entropy_coeff: 0.01
          kl: 0.0065695369720589316
          policy_loss: -0.016799657251168457
          total_loss: 0.19662359758025497
          vf_explained_var: 0.7127186059951782
          vf_loss: 0.22308055345191916
    num_agent_steps_sampled: 3318672
    num_agent_steps_trained: 3318672
    num_steps_sampled: 3318672
    num_steps_trained: 3318672


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,332,104233,3318672,1.29164,6.49,-1.94,90.4364




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3328668
  custom_metrics: {}
  date: 2021-11-15_19-41-42
  done: false
  episode_len_mean: 89.17857142857143
  episode_media: {}
  episode_reward_max: 6.710000000000015
  episode_reward_mean: 1.0715178571428594
  episode_reward_min: -2.080000000000001
  episodes_this_iter: 112
  episodes_total: 35798
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6542856387602978
          entropy_coeff: 0.01
          kl: 0.006708941318245684
          policy_loss: -0.017173312060359835
          total_loss: 0.1807936511018401
          vf_explained_var: 0.6658445596694946
          vf_loss: 0.2073155367233528
    num_agent_steps_sampled: 3328668
    num_agent_steps_trained: 3328668
    num_steps_sampled: 3328668
    num_steps_trained: 3328668
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,333,104557,3328668,1.07152,6.71,-2.08,89.1786




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3338664
  custom_metrics: {}
  date: 2021-11-15_19-47-06
  done: false
  episode_len_mean: 89.49107142857143
  episode_media: {}
  episode_reward_max: 10.840000000000014
  episode_reward_mean: 1.5141071428571455
  episode_reward_min: -2.2499999999999996
  episodes_this_iter: 112
  episodes_total: 35910
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.646608543599773
          entropy_coeff: 0.01
          kl: 0.00598990960428905
          policy_loss: -0.021287733532934106
          total_loss: 0.15322112657933726
          vf_explained_var: 0.7181001901626587
          vf_loss: 0.18562346249480502
    num_agent_steps_sampled: 3338664
    num_agent_steps_trained: 3338664
    num_steps_sampled: 3338664
    num_steps_trained: 3338664
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,334,104880,3338664,1.51411,10.84,-2.25,89.4911


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3348660
  custom_metrics: {}
  date: 2021-11-15_19-52-19
  done: false
  episode_len_mean: 89.14414414414415
  episode_media: {}
  episode_reward_max: 13.050000000000011
  episode_reward_mean: 1.9452252252252282
  episode_reward_min: -2.1900000000000004
  episodes_this_iter: 111
  episodes_total: 36021
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6489329445056424
          entropy_coeff: 0.01
          kl: 0.007465982682097902
          policy_loss: -0.01694079822876578
          total_loss: 0.2047109103685993
          vf_explained_var: 0.7151663303375244
          vf_loss: 0.22900654138185275
    num_agent_steps_sampled: 3348660
    num_agent_steps_trained: 3348660
    num_steps_sampled: 3348660
    num_steps_trained: 3348660
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,335,105193,3348660,1.94523,13.05,-2.19,89.1441




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3358656
  custom_metrics: {}
  date: 2021-11-15_19-57-45
  done: false
  episode_len_mean: 89.76785714285714
  episode_media: {}
  episode_reward_max: 8.990000000000013
  episode_reward_mean: 1.5841964285714312
  episode_reward_min: -2.04
  episodes_this_iter: 112
  episodes_total: 36133
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.634156326045338
          entropy_coeff: 0.01
          kl: 0.006356464240534895
          policy_loss: -0.014120614958497195
          total_loss: 0.1609269052783712
          vf_explained_var: 0.7245543003082275
          vf_loss: 0.18509816160091222
    num_agent_steps_sampled: 3358656
    num_agent_steps_trained: 3358656
    num_steps_sampled: 3358656
    num_steps_trained: 3358656
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,336,105520,3358656,1.5842,8.99,-2.04,89.7679




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3368652
  custom_metrics: {}
  date: 2021-11-15_20-03-24
  done: false
  episode_len_mean: 89.36607142857143
  episode_media: {}
  episode_reward_max: 10.720000000000015
  episode_reward_mean: 1.5663392857142893
  episode_reward_min: -2.199999999999998
  episodes_this_iter: 112
  episodes_total: 36245
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6324684735037325
          entropy_coeff: 0.01
          kl: 0.006001925502865731
          policy_loss: -0.015315829193553863
          total_loss: 0.18716712766844365
          vf_explained_var: 0.7009267807006836
          vf_loss: 0.21342536316882085
    num_agent_steps_sampled: 3368652
    num_agent_steps_trained: 3368652
    num_steps_sampled: 3368652
    num_steps_trained: 3368652
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,337,105859,3368652,1.56634,10.72,-2.2,89.3661


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3378648
  custom_metrics: {}
  date: 2021-11-15_20-08-41
  done: false
  episode_len_mean: 90.39090909090909
  episode_media: {}
  episode_reward_max: 8.59000000000001
  episode_reward_mean: 1.5659090909090938
  episode_reward_min: -2.2199999999999984
  episodes_this_iter: 110
  episodes_total: 36355
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6242952497596415
          entropy_coeff: 0.01
          kl: 0.005861431537005077
          policy_loss: -0.018968686911985916
          total_loss: 0.175969879491191
          vf_explained_var: 0.7340930104255676
          vf_loss: 0.2061593117311788
    num_agent_steps_sampled: 3378648
    num_agent_steps_trained: 3378648
    num_steps_sampled: 3378648
    num_steps_trained: 3378648
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,338,106175,3378648,1.56591,8.59,-2.22,90.3909




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3388644
  custom_metrics: {}
  date: 2021-11-15_20-14-23
  done: false
  episode_len_mean: 88.73451327433628
  episode_media: {}
  episode_reward_max: 6.930000000000007
  episode_reward_mean: 1.7956637168141625
  episode_reward_min: -1.7100000000000009
  episodes_this_iter: 113
  episodes_total: 36468
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.631966711822738
          entropy_coeff: 0.01
          kl: 0.006275082498913332
          policy_loss: -0.016976267428129403
          total_loss: 0.19439547418529152
          vf_explained_var: 0.7576988935470581
          vf_loss: 0.22160906036997324
    num_agent_steps_sampled: 3388644
    num_agent_steps_trained: 3388644
    num_steps_sampled: 3388644
    num_steps_trained: 3388644
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,339,106518,3388644,1.79566,6.93,-1.71,88.7345




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3398640
  custom_metrics: {}
  date: 2021-11-15_20-19-49
  done: false
  episode_len_mean: 89.08035714285714
  episode_media: {}
  episode_reward_max: 7.030000000000009
  episode_reward_mean: 1.701785714285718
  episode_reward_min: -1.900000000000001
  episodes_this_iter: 112
  episodes_total: 36580
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.620554609380217
          entropy_coeff: 0.01
          kl: 0.006928409089168105
          policy_loss: -0.016079090319128117
          total_loss: 0.1870346361090644
          vf_explained_var: 0.7325940132141113
          vf_loss: 0.2115625177701123
    num_agent_steps_sampled: 3398640
    num_agent_steps_trained: 3398640
    num_steps_sampled: 3398640
    num_steps_trained: 3398640
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,340,106843,3398640,1.70179,7.03,-1.9,89.0804


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3408636
  custom_metrics: {}
  date: 2021-11-15_20-25-00
  done: false
  episode_len_mean: 90.4054054054054
  episode_media: {}
  episode_reward_max: 8.960000000000013
  episode_reward_mean: 1.450810810810814
  episode_reward_min: -2.12
  episodes_this_iter: 111
  episodes_total: 36691
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.624558960678231
          entropy_coeff: 0.01
          kl: 0.006474534010909022
          policy_loss: -0.01674258307629448
          total_loss: 0.17391189967986578
          vf_explained_var: 0.7322045564651489
          vf_loss: 0.20030655038829606
    num_agent_steps_sampled: 3408636
    num_agent_steps_trained: 3408636
    num_steps_sampled: 3408636
    num_steps_trained: 3408636
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,341,107154,3408636,1.45081,8.96,-2.12,90.4054




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3418632
  custom_metrics: {}
  date: 2021-11-15_20-30-25
  done: false
  episode_len_mean: 89.34821428571429
  episode_media: {}
  episode_reward_max: 9.020000000000012
  episode_reward_mean: 1.6614285714285748
  episode_reward_min: -2.2299999999999995
  episodes_this_iter: 112
  episodes_total: 36803
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6244756638494313
          entropy_coeff: 0.01
          kl: 0.006516584023926566
          policy_loss: -0.019584866031670034
          total_loss: 0.1573931059979189
          vf_explained_var: 0.7383706569671631
          vf_loss: 0.18652143673613095
    num_agent_steps_sampled: 3418632
    num_agent_steps_trained: 3418632
    num_steps_sampled: 3418632
    num_steps_trained: 3418632
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,342,107479,3418632,1.66143,9.02,-2.23,89.3482


Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3428628
  custom_metrics: {}
  date: 2021-11-15_20-35-35
  done: false
  episode_len_mean: 89.90090090090091
  episode_media: {}
  episode_reward_max: 10.410000000000013
  episode_reward_mean: 1.5821621621621653
  episode_reward_min: -2.059999999999999
  episodes_this_iter: 111
  episodes_total: 36914
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6209645968217115
          entropy_coeff: 0.01
          kl: 0.007183009546968386
          policy_loss: -0.013262492161180474
          total_loss: 0.20350292747100004
          vf_explained_var: 0.7028731107711792
          vf_loss: 0.22456579819783315
    num_agent_steps_sampled: 3428628
    num_agent_steps_trained: 3428628
    num_steps_sampled: 3428628
    num_steps_trained: 3428628
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,343,107790,3428628,1.58216,10.41,-2.06,89.9009




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3438624
  custom_metrics: {}
  date: 2021-11-15_20-41-02
  done: false
  episode_len_mean: 88.75221238938053
  episode_media: {}
  episode_reward_max: 10.440000000000014
  episode_reward_mean: 1.8920353982300921
  episode_reward_min: -2.1899999999999995
  episodes_this_iter: 113
  episodes_total: 37027
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.625249999608749
          entropy_coeff: 0.01
          kl: 0.006519722472342376
          policy_loss: -0.014883389908016428
          total_loss: 0.1912943413013465
          vf_explained_var: 0.7307884097099304
          vf_loss: 0.21572089516040352
    num_agent_steps_sampled: 3438624
    num_agent_steps_trained: 3438624
    num_steps_sampled: 3438624
    num_steps_trained: 3438624
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,344,108117,3438624,1.89204,10.44,-2.19,88.7522




Result for PPO_my_env_907c1_00000:
  agent_timesteps_total: 3448620
  custom_metrics: {}
  date: 2021-11-15_20-46-28
  done: false
  episode_len_mean: 88.30088495575221
  episode_media: {}
  episode_reward_max: 9.110000000000012
  episode_reward_mean: 1.26699115044248
  episode_reward_min: -1.9500000000000006
  episodes_this_iter: 113
  episodes_total: 37140
  experiment_id: 214763e727544e648f411667af87eede
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.562890625
          cur_lr: 5.000000000000001e-05
          entropy: 2.6276405351793666
          entropy_coeff: 0.01
          kl: 0.005606977914184025
          policy_loss: -0.018584450957580254
          total_loss: 0.1477483398318848
          vf_explained_var: 0.7441065907478333
          vf_loss: 0.17823912616835064
    num_agent_steps_sampled: 3448620
    num_agent_steps_trained: 3448620
    num_steps_sampled: 3448620
    num_steps_trained: 3448620
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_907c1_00000,RUNNING,192.168.3.5:154354,345,108442,3448620,1.26699,9.11,-1.95,88.3009


