In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=2, stride=2, padding=0),  
            nn.ELU(),
            nn.Conv2d(32, 32, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(32, 64, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(64, 128, kernel_size=2, stride=2, padding=0),
            nn.ELU(), 
            nn.Conv2d(128, 256, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Conv2d(256, 512, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
class PositionalEncoder(nn.Module):
    def __init__(self, d_model=6):
        super().__init__()
        self.d_model= d_model
        if self.d_model % 6 != 0:
            raise ValueError("d_models must be divedable on 6!")

        pe = np.zeros((9, 11, 11, d_model))

        for pos_x in range(9):
            pe[pos_x,:,:,0:d_model//3:2] = np.sin(0.33 * pos_x / 10_000 ** (6*np.arange(d_model//6)/d_model))
            pe[pos_x,:,:,1:d_model//3:2] = np.cos(0.33 * pos_x / 10_000 ** (6*np.arange(d_model//6)/d_model))

        for pos_y in range(11):
            pe[:,pos_y,:,d_model//3:2*d_model//3:2] = np.sin(0.33 * pos_y / 10_000 ** (6*np.arange(d_model//6)/d_model))
            pe[:,pos_y,:,1+d_model//3:2*d_model//3:2] = np.cos(0.33 * pos_y / 10_000 ** (6*np.arange(d_model//6)/d_model))

        for pos_z in range(11):
            pe[:,:,pos_z,2*d_model//3::2] = np.sin(0.33 * pos_z / 10_000 ** (6*np.arange(d_model//6)/d_model))
            pe[:,:,pos_z,1+2*d_model//3::2] = np.cos(0.33 * pos_z / 10_000 ** (6*np.arange(d_model//6)/d_model))
            
        pe = pe.reshape(9 * 11 * 11, d_model)
        self.pe = torch.tensor(pe).float()
        
    def forward(self):
        #x = x * math.sqrt(d_model // 3) # is it needed?
        #x = x + self.pe
        return self.pe

In [4]:
class FusionNet(nn.Module):
    def __init__(self, d_model=6, num_heads=1):
        super().__init__()
        self.d_model = d_model
        self.pe = nn.Parameter(PositionalEncoder(d_model)())
        
        self.img_preproc = nn.Sequential(
            nn.Linear(512, 60),
            nn.ELU(),
        )
        
        self.cross_attn = nn.MultiheadAttention(d_model, num_heads, batch_first=True)
        self.self_attn = nn.MultiheadAttention(d_model, num_heads, batch_first=True)
        
        self.conv_net = nn.Sequential(
            nn.Conv3d(6, 8, kernel_size=3, padding=1),            # perceptive field = 3
            nn.ELU(),
            nn.Conv3d(8, 16, kernel_size=3, padding=1),           # perceptive field = 5
            nn.ELU(),
            nn.Conv3d(16, 32, kernel_size=3, padding=1),          # perceptive field = 7
            nn.ELU(),
            nn.Conv3d(32, 64, kernel_size=3, padding=1),          # perceptive field = 9
            nn.ELU(),
            nn.Conv3d(64, 128, kernel_size=3, padding=1),         # perceptive field = 11
            nn.ELU(),
            nn.MaxPool3d(kernel_size=(9, 11, 11))
        )
        
        self.img_mlp = nn.Sequential(
            nn.Linear(512, 256),
            nn.ELU(),
            nn.Linear(256, 128),
            nn.ELU(),
        )
        
        self.mlp = nn.Sequential(
            nn.Linear(128 + 128, 256),
            nn.ELU(),
            nn.Linear(256, 256),
            nn.ELU(),
        )
        
    def forward(self, target_features, img_features):
        batch_size = target_features.shape[0]
        
        img_features2 = self.img_preproc(img_features)
        target_features = target_features.permute(0, 2, 3, 4, 1).reshape(batch_size, 9 * 11 * 11, self.d_model)
        img_features2 = img_features2.reshape(batch_size, -1, self.d_model)
        target_features += self.cross_attn(key=img_features2, value=img_features2, query=target_features)[0]
        k = q = target_features + self.pe
        target_features += self.self_attn(key=k, value=target_features, query=q)[0]
        
        target_features = target_features.reshape(batch_size, 9, 11, 11, self.d_model).permute(0, 4, 1, 2, 3)
        target_features = self.conv_net(target_features).reshape(batch_size, -1)
        
        img_features = self.img_mlp(img_features)
        
        features = torch.cat([target_features, img_features], dim=1)
        features = self.mlp(features)
        
        return features

In [5]:
net = FusionNet()
sum(p.numel() for p in net.parameters())

628762

In [9]:
from torch.nn.functional import one_hot

class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        visual_features_dim = 512
        target_features_dim = 9 * 11 * 11 
        self.visual_encoder = VisualEncoder()
        self.visual_encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AngelaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.target_encoder = nn.Sequential(
            nn.Conv3d(7, 6, kernel_size=1, stride=1, padding=0),
            nn.ELU(),
        )
        policy_hidden_dim = 256 
        self.policy_network = FusionNet()
        
        self.action_head = nn.Linear(policy_hidden_dim, action_space.n)
        self.value_head = nn.Linear(policy_hidden_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.visual_encoder.cuda()
            self.target_encoder.cuda()
            self.policy_network.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs']
        pov = obs['pov'].permute(0, 3, 1, 2).float() / 255.0
        target = one_hot(obs['target_grid'].long(), num_classes=7).permute(0, 4, 1, 2, 3).float()
        if self.use_cuda:
            pov.cuda()
            target.cuda()
            
        with torch.no_grad():
            visual_features = self.visual_encoder(pov)
            
        target_features = self.target_encoder(target)
        
        features = self.policy_network(target_features, visual_features)
        
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [6]:
class VisualObservationWrapper(ObsWrapper):
    def __init__(self, env, include_target=False):
        super().__init__(env)
        self.observation_space = {   
            'pov': gym.spaces.Box(low=0, high=255, shape=(64, 64, 3)),
            'inventory': gym.spaces.Box(low=0.0, high=20.0, shape=(6,)),
            'compass': gym.spaces.Box(low=-180.0, high=180.0, shape=(1,))
        }
        if include_target:
            self.observation_space['target_grid'] = \
                gym.spaces.Box(low=0, high=6, shape=(9, 11, 11))
        self.observation_space = gym.spaces.Dict(self.observation_space)

    def observation(self, obs, reward=None, done=None, info=None):
        if info is not None:
            if 'target_grid' in info:
                target_grid = info['target_grid']
                del info['target_grid']
            else:
                logger.error(f'info: {info}')
                if hasattr(self.unwrapped, 'should_reset'):
                    self.unwrapped.should_reset(True)
                target_grid = self.env.unwrapped.tasks.current.target_grid
        else:
            target_grid = self.env.unwrapped.tasks.current.target_grid
        return {
            'pov': obs['pov'].astype(np.float32),
            'inventory': obs['inventory'],
            'compass': np.array([obs['compass']['angle'].item()]),
            'target_grid': target_grid
        }

In [7]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

from iglu.tasks import CustomTasks
task_names = ['C3', 'C17', 'C32']
tasks = []
augmented_chats = np.load("data/augmented_chats.npy")
augmented_tasks = np.load("data/augmented_targets.npy")
augmented_target_names = np.load("data/augmented_target_name.npy")

for i in range(augmented_chats.shape[0]):
    if augmented_target_names[i] in task_names or True:
        task = (augmented_chats[i], augmented_tasks[i])
        tasks.append(task)
print("{} tasks in total.".format(len(tasks)))
    
class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, rew):
        if rew == 0:
            rew = -0.01
        if abs(rew) == 1:
            rew = 0
            
        return rew
    
def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=125)
    env.update_taskset(CustomTasks(tasks))
    #env.update_taskset(TaskSet(preset=['C3', 'C17', 'C32']))
    #env = PovOnlyWrapper(env)
    env = VisualObservationWrapper(env, include_target=True)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    env = RewardWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

2850 tasks in total.


In [10]:
config={
     "env": "my_env", 
     "framework": "torch",
     "num_gpus": 1,
     "num_workers": 3,
     "sgd_minibatch_size": 60,
     "clip_param": 0.2,
     "entropy_coeff": 0.01,
     "lambda": 0.95,
     "train_batch_size": 5_000,
     #"lr": 1e-4,
     #"gamma": 0.99,
     "model": {
            # Specify our custom model from above.
            "custom_model": "my_torch_model",
            # Extra kwargs to be passed to your model's c'tor.
            "custom_model_config": {},
      },
     "logger_config": {
          "wandb": {
              "project": "IGLU-Minecraft",
              "name": "PPO (AUG ALL) pretrained (visual pretrained AngelaCNN + CrossAttn 3) 1->0"
          }
      }

}

agent = PPOTrainer(config, env="my_env")

2021-11-27 18:18:46,104	INFO trainable.py:109 -- Trainable.setup took 42.207 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


In [11]:
agent.restore("/IGLU-Minecraft/checkpoints/all_tasks_aug_cross_attn3/PPO_2021-11-21_00-54-46/PPO_my_env_9f6d0_00000_0_2021-11-21_00-54-46/checkpoint_000960/checkpoint-960")

2021-11-27 18:20:52,648	INFO trainable.py:383 -- Restored on 192.168.3.5 from checkpoint: /IGLU-Minecraft/checkpoints/all_tasks_aug_cross_attn3/PPO_2021-11-21_00-54-46/PPO_my_env_9f6d0_00000_0_2021-11-21_00-54-46/checkpoint_000960/checkpoint-960
2021-11-27 18:20:52,648	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 960, '_timesteps_total': None, '_time_total': 502059.3793668747, '_episodes_total': 173197}


In [None]:
from torch.nn.functional import one_hot

class CrossAttn_LSTM(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        visual_features_dim = 512
        target_features_dim = 9 * 11 * 11 
        self.visual_encoder = VisualEncoder()
        self.visual_encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AngelaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.target_encoder = nn.Sequential(
            nn.Conv3d(7, 6, kernel_size=1, stride=1, padding=0),
            nn.ELU(),
        )
        policy_hidden_dim = 256 
        self.policy_network = FusionNet()
        
        self.action_head = nn.Linear(policy_hidden_dim, action_space.n)
        self.value_head = nn.Linear(policy_hidden_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.visual_encoder.cuda()
            self.target_encoder.cuda()
            self.policy_network.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs']
        pov = obs['pov'].permute(0, 3, 1, 2).float() / 255.0
        target = one_hot(obs['target_grid'].long(), num_classes=7).permute(0, 4, 1, 2, 3).float()
        if self.use_cuda:
            pov.cuda()
            target.cuda()
            
        with torch.no_grad():
            visual_features = self.visual_encoder(pov)
            
        target_features = self.target_encoder(target)
        
        features = self.policy_network(target_features, visual_features)
        
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

ModelCatalog.register_custom_model("CrossAttn_LSTM", CrossAttn_LSTM)

In [None]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 3,
             "sgd_minibatch_size": 60,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 5_000,
             #"lr": 1e-4,
             #"gamma": 0.99,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO (AUG ALL) pretrained (visual pretrained AngelaCNN + CrossAttn 3) 1->0"
                  }
              }

        },
        loggers=[WandbLogger],
        local_dir="/IGLU-Minecraft/checkpoints/all_tasks_aug_cross_attn3",
        keep_checkpoints_num=100,
        checkpoint_freq=5,
        checkpoint_at_end=True,
        restore="/IGLU-Minecraft/checkpoints/all_tasks_aug_cross_attn3/PPO_2021-11-18_22-19-59/PPO_my_env_ab24a_00000_0_2021-11-18_22-20-00/checkpoint_000400/checkpoint-400",
        )



Trial name,status,loc
PPO_my_env_9f6d0_00000,PENDING,


2021-11-21 00:54:47,033	INFO trainable.py:76 -- Checkpoint size is 10703071 bytes
2021-11-21 00:54:47,043	INFO wandb.py:170 -- Already logged into W&B.
2021-11-21 00:54:47,058	ERROR syncer.py:72 -- Log sync requires rsync to be installed.
[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.7 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=133291)[0m 2021-11-21 00:54:50,672	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=133291)[0m 2021-11-21 00:54:50,672	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Trial name,status,loc
PPO_my_env_9f6d0_00000,RUNNING,


[2m[36m(pid=133291)[0m 2021-11-21 00:55:33,664	INFO trainable.py:109 -- Trainable.setup took 45.601 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[2m[36m(pid=133291)[0m 2021-11-21 00:55:33,700	INFO trainable.py:383 -- Restored on 192.168.3.5 from checkpoint: /IGLU-Minecraft/checkpoints/all_tasks_aug_cross_attn3/PPO_2021-11-21_00-54-46/PPO_my_env_9f6d0_00000_0_2021-11-21_00-54-46/tmp76z18i28restore_from_object/checkpoint-400
[2m[36m(pid=133291)[0m 2021-11-21 00:55:33,700	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 400, '_timesteps_total': None, '_time_total': 194817.3635854721, '_episodes_total': 66731}


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3408546
  custom_metrics: {}
  date: 2021-11-21_01-06-09
  done: false
  episode_len_mean: 49.585
  episode_media: {}
  episode_reward_max: 15.670000000000005
  episode_reward_mean: 4.310000000000003
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 200
  episodes_total: 66931
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.200193782575638
          entropy_coeff: 0.01
          kl: 0.03092679460174876
          policy_loss: -0.07231750708771986
          total_loss: 0.052229352827667885
          vf_explained_var: 0.9009210467338562
          vf_loss: 0.14036343848299102
    num_agent_steps_sampled: 3408546
    num_agent_steps_trained: 3408546
    num_steps_sampled: 3408546
    num_steps_trained: 3408546
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,401,195453,3408546,4.31,15.67,-0.53,49.585




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3418542
  custom_metrics: {}
  date: 2021-11-21_01-15-36
  done: false
  episode_len_mean: 49.504950495049506
  episode_media: {}
  episode_reward_max: 15.680000000000005
  episode_reward_mean: 4.8373267326732705
  episode_reward_min: -0.5000000000000004
  episodes_this_iter: 202
  episodes_total: 67133
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.1679479569077014
          entropy_coeff: 0.01
          kl: 0.038529437175046165
          policy_loss: -0.07817128649217321
          total_loss: 0.061509842705507536
          vf_explained_var: 0.9329590797424316
          vf_loss: 0.14980177771960232
    num_agent_steps_sampled: 3418542
    num_agent_steps_trained: 3418542
    num_steps_sampled: 3418542
    num_steps_trained: 3418542
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,402,196020,3418542,4.83733,15.68,-0.5,49.505




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3428538
  custom_metrics: {}
  date: 2021-11-21_01-24-51
  done: false
  episode_len_mean: 48.97549019607843
  episode_media: {}
  episode_reward_max: 11.720000000000004
  episode_reward_mean: 4.600784313725494
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 204
  episodes_total: 67337
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45000000000000007
          cur_lr: 5.000000000000001e-05
          entropy: 2.1755959026545404
          entropy_coeff: 0.01
          kl: 0.030961752294377986
          policy_loss: -0.08398848792983253
          total_loss: 0.026495607597979573
          vf_explained_var: 0.9504941701889038
          vf_loss: 0.11830726745164388
    num_agent_steps_sampled: 3428538
    num_agent_steps_trained: 3428538
    num_steps_sampled: 3428538
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,403,196575,3428538,4.60078,11.72,-0.55,48.9755




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3438534
  custom_metrics: {}
  date: 2021-11-21_01-34-00
  done: false
  episode_len_mean: 49.03431372549019
  episode_media: {}
  episode_reward_max: 13.710000000000004
  episode_reward_mean: 4.579754901960787
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 204
  episodes_total: 67541
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.1690342714987607
          entropy_coeff: 0.01
          kl: 0.029013735609475146
          policy_loss: -0.0833084141061499
          total_loss: 0.03912782540817088
          vf_explained_var: 0.9443672895431519
          vf_loss: 0.12454231013179229
    num_agent_steps_sampled: 3438534
    num_agent_steps_trained: 3438534
    num_steps_sampled: 3438534
    num_steps_trained: 3438

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,404,197124,3438534,4.57975,13.71,-0.5,49.0343




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3448530
  custom_metrics: {}
  date: 2021-11-21_01-43-09
  done: false
  episode_len_mean: 49.53465346534654
  episode_media: {}
  episode_reward_max: 15.630000000000006
  episode_reward_mean: 4.523960396039607
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 202
  episodes_total: 67743
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 2.1675691311857306
          entropy_coeff: 0.01
          kl: 0.026228705960480308
          policy_loss: -0.08076940090092491
          total_loss: 0.05896018733879886
          vf_explained_var: 0.9401130080223083
          vf_loss: 0.13484871268074167
    num_agent_steps_sampled: 3448530
    num_agent_steps_trained: 3448530
    num_steps_sampled: 3448530
    num_steps_trained: 3448530
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,405,197673,3448530,4.52396,15.63,-0.5,49.5347




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3458526
  custom_metrics: {}
  date: 2021-11-21_01-52-17
  done: false
  episode_len_mean: 49.6865671641791
  episode_media: {}
  episode_reward_max: 11.620000000000005
  episode_reward_mean: 4.6980099502487604
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 201
  episodes_total: 67944
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.1767602116228586
          entropy_coeff: 0.01
          kl: 0.02140068157684134
          policy_loss: -0.07523903867283176
          total_loss: 0.0717563625402279
          vf_explained_var: 0.9458786249160767
          vf_loss: 0.13626071846834478
    num_agent_steps_sampled: 3458526
    num_agent_steps_trained: 3458526
    num_steps_sampled: 3458526
    num_steps_trained: 34585

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,406,198221,3458526,4.69801,11.62,-0.54,49.6866




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3468522
  custom_metrics: {}
  date: 2021-11-21_02-01-28
  done: false
  episode_len_mean: 49.15686274509804
  episode_media: {}
  episode_reward_max: 13.670000000000005
  episode_reward_mean: 4.74338235294118
  episode_reward_min: -0.5800000000000003
  episodes_this_iter: 204
  episodes_total: 68148
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.170133454277812
          entropy_coeff: 0.01
          kl: 0.01672114028795331
          policy_loss: -0.07353135058588396
          total_loss: 0.06870607189968511
          vf_explained_var: 0.9439539313316345
          vf_loss: 0.1258459076817413
    num_agent_steps_sampled: 3468522
    num_agent_steps_trained: 3468522
    num_steps_sampled: 3468522
    num_steps_trained: 3468522

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,407,198771,3468522,4.74338,13.67,-0.58,49.1569


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3478518
  custom_metrics: {}
  date: 2021-11-21_02-10-26
  done: false
  episode_len_mean: 48.12019230769231
  episode_media: {}
  episode_reward_max: 13.590000000000007
  episode_reward_mean: 4.663894230769235
  episode_reward_min: -0.5700000000000003
  episodes_this_iter: 208
  episodes_total: 68356
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.15771398319298
          entropy_coeff: 0.01
          kl: 0.01640385570533209
          policy_loss: -0.0741542227734013
          total_loss: 0.1023397530894405
          vf_explained_var: 0.9350979924201965
          vf_loss: 0.16070108078897044
    num_agent_steps_sampled: 3478518
    num_agent_steps_trained: 3478518
    num_steps_sampled: 3478518
    num_steps_trained: 3478518


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,408,199310,3478518,4.66389,13.59,-0.57,48.1202




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3488514
  custom_metrics: {}
  date: 2021-11-21_02-19-36
  done: false
  episode_len_mean: 48.300970873786405
  episode_media: {}
  episode_reward_max: 13.650000000000006
  episode_reward_mean: 4.563786407766993
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 206
  episodes_total: 68562
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.165860636358759
          entropy_coeff: 0.01
          kl: 0.015618529875685223
          policy_loss: -0.07537195493406841
          total_loss: 0.05714551004621244
          vf_explained_var: 0.9443585872650146
          vf_loss: 0.11859510741832416
    num_agent_steps_sampled: 3488514
    num_agent_steps_trained: 3488514
    num_steps_sampled: 3488514
    num_steps_trained: 34

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,409,199860,3488514,4.56379,13.65,-0.48,48.301




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3498510
  custom_metrics: {}
  date: 2021-11-21_02-29-03
  done: false
  episode_len_mean: 47.98557692307692
  episode_media: {}
  episode_reward_max: 11.670000000000003
  episode_reward_mean: 4.838509615384618
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 208
  episodes_total: 68770
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.152287675434327
          entropy_coeff: 0.01
          kl: 0.016354095882487076
          policy_loss: -0.07485868171051076
          total_loss: 0.07348306436391115
          vf_explained_var: 0.9405243992805481
          vf_loss: 0.13260794739439022
    num_agent_steps_sampled: 3498510
    num_agent_steps_trained: 3498510
    num_steps_sampled: 3498510
    num_steps_trained: 349

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,410,200427,3498510,4.83851,11.67,-0.48,47.9856


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3508506
  custom_metrics: {}
  date: 2021-11-21_02-38-00
  done: false
  episode_len_mean: 48.853658536585364
  episode_media: {}
  episode_reward_max: 13.670000000000005
  episode_reward_mean: 4.722975609756101
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 205
  episodes_total: 68975
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1789592105461413
          entropy_coeff: 0.01
          kl: 0.01627043230497233
          policy_loss: -0.07538058222081567
          total_loss: 0.07148870322167955
          vf_explained_var: 0.9498631954193115
          vf_loss: 0.13159279875638688
    num_agent_steps_sampled: 3508506
    num_agent_steps_trained: 3508506
    num_steps_sampled: 3508506
    num_steps_trained: 35

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,411,200964,3508506,4.72298,13.67,-0.48,48.8537




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3518502
  custom_metrics: {}
  date: 2021-11-21_02-47-40
  done: false
  episode_len_mean: 47.44549763033175
  episode_media: {}
  episode_reward_max: 13.750000000000004
  episode_reward_mean: 4.524075829383889
  episode_reward_min: -0.6100000000000003
  episodes_this_iter: 211
  episodes_total: 69186
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1914987323753325
          entropy_coeff: 0.01
          kl: 0.016158363204840192
          policy_loss: -0.08308332698168772
          total_loss: 0.05767535509269995
          vf_explained_var: 0.9490606784820557
          vf_loss: 0.12586289715320134
    num_agent_steps_sampled: 3518502
    num_agent_steps_trained: 3518502
    num_steps_sampled: 3518502
    num_steps_trained: 351

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,412,201544,3518502,4.52408,13.75,-0.61,47.4455




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3528498
  custom_metrics: {}
  date: 2021-11-21_02-56-55
  done: false
  episode_len_mean: 48.08173076923077
  episode_media: {}
  episode_reward_max: 13.690000000000005
  episode_reward_mean: 4.626971153846157
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 208
  episodes_total: 69394
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1485744424613125
          entropy_coeff: 0.01
          kl: 0.01587085827799107
          policy_loss: -0.0742857746197959
          total_loss: 0.06210954498303444
          vf_explained_var: 0.9554295539855957
          vf_loss: 0.12172526374625708
    num_agent_steps_sampled: 3528498
    num_agent_steps_trained: 3528498
    num_steps_sampled: 3528498
    num_steps_trained: 35284

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,413,202099,3528498,4.62697,13.69,-0.51,48.0817




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3538494
  custom_metrics: {}
  date: 2021-11-21_03-06-17
  done: false
  episode_len_mean: 48.14009661835749
  episode_media: {}
  episode_reward_max: 13.670000000000005
  episode_reward_mean: 4.677004830917878
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 207
  episodes_total: 69601
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1854875765890482
          entropy_coeff: 0.01
          kl: 0.01632410055980715
          policy_loss: -0.07096352096559713
          total_loss: 0.0916743326570644
          vf_explained_var: 0.9260156154632568
          vf_loss: 0.1473043874892627
    num_agent_steps_sampled: 3538494
    num_agent_steps_trained: 3538494
    num_steps_sampled: 3538494
    num_steps_trained: 35384

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,414,202661,3538494,4.677,13.67,-0.49,48.1401


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3548490
  custom_metrics: {}
  date: 2021-11-21_03-15-16
  done: false
  episode_len_mean: 48.39613526570049
  episode_media: {}
  episode_reward_max: 13.720000000000004
  episode_reward_mean: 4.984444444444447
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 207
  episodes_total: 69808
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.15248876178121
          entropy_coeff: 0.01
          kl: 0.01589445122469998
          policy_loss: -0.07227696069037474
          total_loss: 0.08001332461193304
          vf_explained_var: 0.9598656296730042
          vf_loss: 0.13760562605751925
    num_agent_steps_sampled: 3548490
    num_agent_steps_trained: 3548490
    num_steps_sampled: 3548490
    num_steps_trained: 354849

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,415,203199,3548490,4.98444,13.72,-0.54,48.3961




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3558486
  custom_metrics: {}
  date: 2021-11-21_03-24-34
  done: false
  episode_len_mean: 47.33175355450237
  episode_media: {}
  episode_reward_max: 13.650000000000006
  episode_reward_mean: 4.5258293838862595
  episode_reward_min: -0.4400000000000002
  episodes_this_iter: 211
  episodes_total: 70019
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.17581731981063
          entropy_coeff: 0.01
          kl: 0.015884735031890154
          policy_loss: -0.08029936735626093
          total_loss: 0.06522661381381456
          vf_explained_var: 0.9475409388542175
          vf_loss: 0.13109674138422817
    num_agent_steps_sampled: 3558486
    num_agent_steps_trained: 3558486
    num_steps_sampled: 3558486
    num_steps_trained: 3558

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,416,203758,3558486,4.52583,13.65,-0.44,47.3318




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3568482
  custom_metrics: {}
  date: 2021-11-21_03-34-05
  done: false
  episode_len_mean: 47.80861244019139
  episode_media: {}
  episode_reward_max: 11.780000000000003
  episode_reward_mean: 5.213157894736845
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 209
  episodes_total: 70228
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1238779293485432
          entropy_coeff: 0.01
          kl: 0.01599631675376845
          policy_loss: -0.0766507928718403
          total_loss: 0.06821537824747588
          vf_explained_var: 0.9560762643814087
          vf_loss: 0.12966334038165334
    num_agent_steps_sampled: 3568482
    num_agent_steps_trained: 3568482
    num_steps_sampled: 3568482
    num_steps_trained: 35684

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,417,204328,3568482,5.21316,11.78,-0.54,47.8086




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3578478
  custom_metrics: {}
  date: 2021-11-21_03-43-33
  done: false
  episode_len_mean: 48.56310679611651
  episode_media: {}
  episode_reward_max: 15.630000000000006
  episode_reward_mean: 4.933398058252431
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 206
  episodes_total: 70434
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1543777179526518
          entropy_coeff: 0.01
          kl: 0.015814321739185164
          policy_loss: -0.07747842032025451
          total_loss: 0.06513637920731588
          vf_explained_var: 0.944657564163208
          vf_loss: 0.12813157401146763
    num_agent_steps_sampled: 3578478
    num_agent_steps_trained: 3578478
    num_steps_sampled: 3578478
    num_steps_trained: 3578

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,418,204897,3578478,4.9334,15.63,-0.52,48.5631




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3588474
  custom_metrics: {}
  date: 2021-11-21_03-52-53
  done: false
  episode_len_mean: 49.17156862745098
  episode_media: {}
  episode_reward_max: 13.720000000000004
  episode_reward_mean: 4.656960784313728
  episode_reward_min: -0.5700000000000003
  episodes_this_iter: 204
  episodes_total: 70638
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.162404299261101
          entropy_coeff: 0.01
          kl: 0.015237738402496112
          policy_loss: -0.07741685931888245
          total_loss: 0.05785802046829263
          vf_explained_var: 0.9218599796295166
          vf_loss: 0.12218544798174753
    num_agent_steps_sampled: 3588474
    num_agent_steps_trained: 3588474
    num_steps_sampled: 3588474
    num_steps_trained: 3588

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,419,205457,3588474,4.65696,13.72,-0.57,49.1716




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3598470
  custom_metrics: {}
  date: 2021-11-21_04-02-02
  done: false
  episode_len_mean: 49.32673267326733
  episode_media: {}
  episode_reward_max: 15.670000000000005
  episode_reward_mean: 4.903861386138618
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 202
  episodes_total: 70840
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.151966855420645
          entropy_coeff: 0.01
          kl: 0.016313315406282454
          policy_loss: -0.07429225414086092
          total_loss: 0.06832960664478269
          vf_explained_var: 0.9426409006118774
          vf_loss: 0.1269777568229231
    num_agent_steps_sampled: 3598470
    num_agent_steps_trained: 3598470
    num_steps_sampled: 3598470
    num_steps_trained: 3598

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,420,206006,3598470,4.90386,15.67,-0.49,49.3267




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3608466
  custom_metrics: {}
  date: 2021-11-21_04-11-11
  done: false
  episode_len_mean: 49.48019801980198
  episode_media: {}
  episode_reward_max: 13.780000000000003
  episode_reward_mean: 4.884158415841588
  episode_reward_min: -0.4400000000000002
  episodes_this_iter: 202
  episodes_total: 71042
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.157275920220647
          entropy_coeff: 0.01
          kl: 0.0154388976841473
          policy_loss: -0.07858186429050959
          total_loss: 0.060862771479429305
          vf_explained_var: 0.9529964327812195
          vf_loss: 0.12584565469750156
    num_agent_steps_sampled: 3608466
    num_agent_steps_trained: 3608466
    num_steps_sampled: 3608466
    num_steps_trained: 36084

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,421,206554,3608466,4.88416,13.78,-0.44,49.4802


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3618462
  custom_metrics: {}
  date: 2021-11-21_04-20-07
  done: false
  episode_len_mean: 49.351485148514854
  episode_media: {}
  episode_reward_max: 11.660000000000005
  episode_reward_mean: 4.843613861386142
  episode_reward_min: -0.4600000000000002
  episodes_this_iter: 202
  episodes_total: 71244
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.18199263935587
          entropy_coeff: 0.01
          kl: 0.014819877214665643
          policy_loss: -0.07768205114608795
          total_loss: 0.04953177942923094
          vf_explained_var: 0.9587133526802063
          vf_loss: 0.11527222370921474
    num_agent_steps_sampled: 3618462
    num_agent_steps_trained: 3618462
    num_steps_sampled: 3618462
    num_steps_trained: 3618

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,422,207090,3618462,4.84361,11.66,-0.46,49.3515




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3628458
  custom_metrics: {}
  date: 2021-11-21_04-29-33
  done: false
  episode_len_mean: 48.87317073170732
  episode_media: {}
  episode_reward_max: 15.570000000000007
  episode_reward_mean: 4.768000000000003
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 205
  episodes_total: 71449
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1775937391093456
          entropy_coeff: 0.01
          kl: 0.016352150176937974
          policy_loss: -0.0746065208265562
          total_loss: 0.07324952365108628
          vf_explained_var: 0.9495317339897156
          vf_loss: 0.13237973775679104
    num_agent_steps_sampled: 3628458
    num_agent_steps_trained: 3628458
    num_steps_sampled: 3628458
    num_steps_trained: 3628

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,423,207656,3628458,4.768,15.57,-0.54,48.8732




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3638454
  custom_metrics: {}
  date: 2021-11-21_04-38-42
  done: false
  episode_len_mean: 49.26600985221675
  episode_media: {}
  episode_reward_max: 13.690000000000005
  episode_reward_mean: 4.60546798029557
  episode_reward_min: -0.5900000000000003
  episodes_this_iter: 203
  episodes_total: 71652
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.170955098800391
          entropy_coeff: 0.01
          kl: 0.014945778557310364
          policy_loss: -0.07890647319146085
          total_loss: 0.04989142625149926
          vf_explained_var: 0.9480834603309631
          vf_loss: 0.11645909679247568
    num_agent_steps_sampled: 3638454
    num_agent_steps_trained: 3638454
    num_steps_sampled: 3638454
    num_steps_trained: 36384

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,424,208206,3638454,4.60547,13.69,-0.59,49.266


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3648450
  custom_metrics: {}
  date: 2021-11-21_04-47-37
  done: false
  episode_len_mean: 50.4263959390863
  episode_media: {}
  episode_reward_max: 17.689999999999994
  episode_reward_mean: 5.019340101522847
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 197
  episodes_total: 71849
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1565069990943235
          entropy_coeff: 0.01
          kl: 0.015748957073002386
          policy_loss: -0.07410296170072757
          total_loss: 0.0655954224281034
          vf_explained_var: 0.9479078650474548
          vf_loss: 0.12538536036944384
    num_agent_steps_sampled: 3648450
    num_agent_steps_trained: 3648450
    num_steps_sampled: 3648450
    num_steps_trained: 36484

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,425,208740,3648450,5.01934,17.69,-0.54,50.4264




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3658446
  custom_metrics: {}
  date: 2021-11-21_04-57-02
  done: false
  episode_len_mean: 48.858536585365854
  episode_media: {}
  episode_reward_max: 13.680000000000005
  episode_reward_mean: 5.048097560975614
  episode_reward_min: -0.4200000000000002
  episodes_this_iter: 205
  episodes_total: 72054
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.154323020326086
          entropy_coeff: 0.01
          kl: 0.015215965829666138
          policy_loss: -0.07493956084089656
          total_loss: 0.06301036129969646
          vf_explained_var: 0.956092119216919
          vf_loss: 0.124829278920448
    num_agent_steps_sampled: 3658446
    num_agent_steps_trained: 3658446
    num_steps_sampled: 3658446
    num_steps_trained: 365844

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,426,209305,3658446,5.0481,13.68,-0.42,48.8585




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3668442
  custom_metrics: {}
  date: 2021-11-21_05-06-10
  done: false
  episode_len_mean: 49.791044776119406
  episode_media: {}
  episode_reward_max: 13.660000000000005
  episode_reward_mean: 4.916019900497516
  episode_reward_min: -0.46000000000000024
  episodes_this_iter: 201
  episodes_total: 72255
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.169194159402426
          entropy_coeff: 0.01
          kl: 0.015193557251391265
          policy_loss: -0.07619675773966297
          total_loss: 0.05865903365418226
          vf_explained_var: 0.9533183574676514
          vf_loss: 0.12193490897995386
    num_agent_steps_sampled: 3668442
    num_agent_steps_trained: 3668442
    num_steps_sampled: 3668442
    num_steps_trained: 36

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,427,209854,3668442,4.91602,13.66,-0.46,49.791




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3678438
  custom_metrics: {}
  date: 2021-11-21_05-15-19
  done: false
  episode_len_mean: 49.475247524752476
  episode_media: {}
  episode_reward_max: 17.66
  episode_reward_mean: 4.814108910891093
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 202
  episodes_total: 72457
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.164518617793738
          entropy_coeff: 0.01
          kl: 0.01518456445443593
          policy_loss: -0.07220587928995774
          total_loss: 0.06542152929019451
          vf_explained_var: 0.9472803473472595
          vf_loss: 0.12468025697503672
    num_agent_steps_sampled: 3678438
    num_agent_steps_trained: 3678438
    num_steps_sampled: 3678438
    num_steps_trained: 3678438
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,428,210403,3678438,4.81411,17.66,-0.51,49.4752




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3688434
  custom_metrics: {}
  date: 2021-11-21_05-24-55
  done: false
  episode_len_mean: 49.80597014925373
  episode_media: {}
  episode_reward_max: 11.740000000000004
  episode_reward_mean: 4.644975124378114
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 201
  episodes_total: 72658
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1717574285455497
          entropy_coeff: 0.01
          kl: 0.014797702146072867
          policy_loss: -0.07578228854025615
          total_loss: 0.05319261284008338
          vf_explained_var: 0.9421861171722412
          vf_loss: 0.11698145944827112
    num_agent_steps_sampled: 3688434
    num_agent_steps_trained: 3688434
    num_steps_sampled: 3688434
    num_steps_trained: 368

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,429,210979,3688434,4.64498,11.74,-0.53,49.806


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3698430
  custom_metrics: {}
  date: 2021-11-21_05-33-51
  done: false
  episode_len_mean: 49.36945812807882
  episode_media: {}
  episode_reward_max: 13.600000000000007
  episode_reward_mean: 4.568719211822664
  episode_reward_min: -0.6100000000000003
  episodes_this_iter: 203
  episodes_total: 72861
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1760070472357262
          entropy_coeff: 0.01
          kl: 0.0147388385402182
          policy_loss: -0.07872667708116828
          total_loss: 0.04573870369015595
          vf_explained_var: 0.9521480202674866
          vf_loss: 0.11264853315562548
    num_agent_steps_sampled: 3698430
    num_agent_steps_trained: 3698430
    num_steps_sampled: 3698430
    num_steps_trained: 36984

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,430,211514,3698430,4.56872,13.6,-0.61,49.3695




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3708426
  custom_metrics: {}
  date: 2021-11-21_05-43-13
  done: false
  episode_len_mean: 48.916666666666664
  episode_media: {}
  episode_reward_max: 13.660000000000005
  episode_reward_mean: 4.393088235294122
  episode_reward_min: -0.46000000000000024
  episodes_this_iter: 204
  episodes_total: 73065
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.191637620437576
          entropy_coeff: 0.01
          kl: 0.014540408624818403
          policy_loss: -0.07735767245462757
          total_loss: 0.046923784144521606
          vf_explained_var: 0.9399353265762329
          vf_loss: 0.11307296391368944
    num_agent_steps_sampled: 3708426
    num_agent_steps_trained: 3708426
    num_steps_sampled: 3708426
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,431,212076,3708426,4.39309,13.66,-0.46,48.9167




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3718422
  custom_metrics: {}
  date: 2021-11-21_05-53-01
  done: false
  episode_len_mean: 49.08866995073892
  episode_media: {}
  episode_reward_max: 17.629999999999992
  episode_reward_mean: 4.684285714285718
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 203
  episodes_total: 73268
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.192750055004794
          entropy_coeff: 0.01
          kl: 0.01605895866799508
          policy_loss: -0.07873974230572213
          total_loss: 0.0650182657104775
          vf_explained_var: 0.9408880472183228
          vf_loss: 0.12910119116962718
    num_agent_steps_sampled: 3718422
    num_agent_steps_trained: 3718422
    num_steps_sampled: 3718422
    num_steps_trained: 371842

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,432,212664,3718422,4.68429,17.63,-0.54,49.0887




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3728418
  custom_metrics: {}
  date: 2021-11-21_06-02-08
  done: false
  episode_len_mean: 49.885572139303484
  episode_media: {}
  episode_reward_max: 13.730000000000004
  episode_reward_mean: 4.585124378109457
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 201
  episodes_total: 73469
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.178925366645836
          entropy_coeff: 0.01
          kl: 0.014897294823903841
          policy_loss: -0.07824255513429183
          total_loss: 0.0545672998240866
          vf_explained_var: 0.9453961253166199
          vf_loss: 0.12066120754143814
    num_agent_steps_sampled: 3728418
    num_agent_steps_trained: 3728418
    num_steps_sampled: 3728418
    num_steps_trained: 3728

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,433,213212,3728418,4.58512,13.73,-0.52,49.8856




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3738414
  custom_metrics: {}
  date: 2021-11-21_06-11-48
  done: false
  episode_len_mean: 49.34158415841584
  episode_media: {}
  episode_reward_max: 13.670000000000005
  episode_reward_mean: 4.796485148514855
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 202
  episodes_total: 73671
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1801374736081165
          entropy_coeff: 0.01
          kl: 0.014986835996619754
          policy_loss: -0.08371758996768203
          total_loss: 0.03702923084129436
          vf_explained_var: 0.9429067969322205
          vf_loss: 0.10840630856853158
    num_agent_steps_sampled: 3738414
    num_agent_steps_trained: 3738414
    num_steps_sampled: 3738414
    num_steps_trained: 373

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,434,213791,3738414,4.79649,13.67,-0.51,49.3416




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3748410
  custom_metrics: {}
  date: 2021-11-21_06-21-02
  done: false
  episode_len_mean: 48.71844660194175
  episode_media: {}
  episode_reward_max: 13.620000000000006
  episode_reward_mean: 4.475097087378645
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 206
  episodes_total: 73877
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1940031495917753
          entropy_coeff: 0.01
          kl: 0.014906730649934518
          policy_loss: -0.07687353200875856
          total_loss: 0.059289514228850794
          vf_explained_var: 0.9191843271255493
          vf_loss: 0.12414368049940094
    num_agent_steps_sampled: 3748410
    num_agent_steps_trained: 3748410
    num_steps_sampled: 3748410
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,435,214345,3748410,4.4751,13.62,-0.49,48.7184




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3758406
  custom_metrics: {}
  date: 2021-11-21_06-30-15
  done: false
  episode_len_mean: 49.02450980392157
  episode_media: {}
  episode_reward_max: 13.660000000000005
  episode_reward_mean: 4.806421568627454
  episode_reward_min: -0.4200000000000002
  episodes_this_iter: 204
  episodes_total: 74081
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1700214669168236
          entropy_coeff: 0.01
          kl: 0.015351684290202058
          policy_loss: -0.07640338075700757
          total_loss: 0.06829640462798037
          vf_explained_var: 0.9455568194389343
          vf_loss: 0.131426941905915
    num_agent_steps_sampled: 3758406
    num_agent_steps_trained: 3758406
    num_steps_sampled: 3758406
    num_steps_trained: 37584

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,436,214898,3758406,4.80642,13.66,-0.42,49.0245




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3768402
  custom_metrics: {}
  date: 2021-11-21_06-39-36
  done: false
  episode_len_mean: 49.48756218905473
  episode_media: {}
  episode_reward_max: 13.680000000000005
  episode_reward_mean: 4.717810945273635
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 201
  episodes_total: 74282
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1673317788834554
          entropy_coeff: 0.01
          kl: 0.015407339343018887
          policy_loss: -0.0780470002667733
          total_loss: 0.05940907627660767
          vf_explained_var: 0.9505224227905273
          vf_loss: 0.12402954846901079
    num_agent_steps_sampled: 3768402
    num_agent_steps_trained: 3768402
    num_steps_sampled: 3768402
    num_steps_trained: 3768

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,437,215459,3768402,4.71781,13.68,-0.52,49.4876


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3778398
  custom_metrics: {}
  date: 2021-11-21_06-48-31
  done: false
  episode_len_mean: 49.65346534653465
  episode_media: {}
  episode_reward_max: 15.620000000000006
  episode_reward_mean: 5.068910891089113
  episode_reward_min: -0.46000000000000024
  episodes_this_iter: 202
  episodes_total: 74484
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1677443190990204
          entropy_coeff: 0.01
          kl: 0.014943438398932007
          policy_loss: -0.08041458166114893
          total_loss: 0.0611392560683932
          vf_explained_var: 0.9395925402641296
          vf_loss: 0.12918825934932804
    num_agent_steps_sampled: 3778398
    num_agent_steps_trained: 3778398
    num_steps_sampled: 3778398
    num_steps_trained: 377

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,438,215994,3778398,5.06891,15.62,-0.46,49.6535




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3788394
  custom_metrics: {}
  date: 2021-11-21_06-57-40
  done: false
  episode_len_mean: 49.41871921182266
  episode_media: {}
  episode_reward_max: 13.670000000000005
  episode_reward_mean: 4.542709359605914
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 203
  episodes_total: 74687
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.176641442330487
          entropy_coeff: 0.01
          kl: 0.015110874714723223
          policy_loss: -0.07845088801069765
          total_loss: 0.049745643788220884
          vf_explained_var: 0.9467602968215942
          vf_loss: 0.11553848418566103
    num_agent_steps_sampled: 3788394
    num_agent_steps_trained: 3788394
    num_steps_sampled: 3788394
    num_steps_trained: 37

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,439,216544,3788394,4.54271,13.67,-0.49,49.4187




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3798390
  custom_metrics: {}
  date: 2021-11-21_07-06-49
  done: false
  episode_len_mean: 49.815
  episode_media: {}
  episode_reward_max: 13.530000000000008
  episode_reward_mean: 4.588850000000003
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 200
  episodes_total: 74887
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1510483770246007
          entropy_coeff: 0.01
          kl: 0.015266891697966474
          policy_loss: -0.07591270228674118
          total_loss: 0.05053323126509884
          vf_explained_var: 0.9399974942207336
          vf_loss: 0.1131765279575741
    num_agent_steps_sampled: 3798390
    num_agent_steps_trained: 3798390
    num_steps_sampled: 3798390
    num_steps_trained: 3798390
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,440,217092,3798390,4.58885,13.53,-0.51,49.815




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3808386
  custom_metrics: {}
  date: 2021-11-21_07-16-24
  done: false
  episode_len_mean: 48.81951219512195
  episode_media: {}
  episode_reward_max: 13.690000000000005
  episode_reward_mean: 5.00975609756098
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 205
  episodes_total: 75092
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.137221434844067
          entropy_coeff: 0.01
          kl: 0.014887165850254862
          policy_loss: -0.08071920625861456
          total_loss: 0.058152687048843556
          vf_explained_var: 0.9575442671775818
          vf_loss: 0.12632928256657963
    num_agent_steps_sampled: 3808386
    num_agent_steps_trained: 3808386
    num_steps_sampled: 3808386
    num_steps_trained: 380

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,441,217667,3808386,5.00976,13.69,-0.48,48.8195




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3818382
  custom_metrics: {}
  date: 2021-11-21_07-25-35
  done: false
  episode_len_mean: 49.12807881773399
  episode_media: {}
  episode_reward_max: 13.680000000000005
  episode_reward_mean: 4.659014778325126
  episode_reward_min: -0.47000000000000025
  episodes_this_iter: 203
  episodes_total: 75295
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1652499095024353
          entropy_coeff: 0.01
          kl: 0.015150077279667471
          policy_loss: -0.07177340430656089
          total_loss: 0.06672366590998707
          vf_explained_var: 0.9519090056419373
          vf_loss: 0.12563579902885086
    num_agent_steps_sampled: 3818382
    num_agent_steps_trained: 3818382
    num_steps_sampled: 3818382
    num_steps_trained: 38

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,442,218218,3818382,4.65901,13.68,-0.47,49.1281


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3828378
  custom_metrics: {}
  date: 2021-11-21_07-34-34
  done: false
  episode_len_mean: 48.58048780487805
  episode_media: {}
  episode_reward_max: 11.730000000000004
  episode_reward_mean: 4.825609756097564
  episode_reward_min: -0.4100000000000002
  episodes_this_iter: 205
  episodes_total: 75500
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1738638749802446
          entropy_coeff: 0.01
          kl: 0.014926989223198957
          policy_loss: -0.07828427869043732
          total_loss: 0.055948745065252006
          vf_explained_var: 0.9405080080032349
          vf_loss: 0.12196611421403426
    num_agent_steps_sampled: 3828378
    num_agent_steps_trained: 3828378
    num_steps_sampled: 3828378
    num_steps_trained: 38

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,443,218757,3828378,4.82561,11.73,-0.41,48.5805




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3838374
  custom_metrics: {}
  date: 2021-11-21_07-43-46
  done: false
  episode_len_mean: 48.81553398058252
  episode_media: {}
  episode_reward_max: 13.690000000000003
  episode_reward_mean: 4.49961165048544
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 206
  episodes_total: 75706
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.208290288534509
          entropy_coeff: 0.01
          kl: 0.014397431547881999
          policy_loss: -0.07967661782153826
          total_loss: 0.04630718429677461
          vf_explained_var: 0.941429853439331
          vf_loss: 0.11526755484524202
    num_agent_steps_sampled: 3838374
    num_agent_steps_trained: 3838374
    num_steps_sampled: 3838374
    num_steps_trained: 383837

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,444,219309,3838374,4.49961,13.69,-0.53,48.8155


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3848370
  custom_metrics: {}
  date: 2021-11-21_07-52-47
  done: false
  episode_len_mean: 48.40096618357488
  episode_media: {}
  episode_reward_max: 13.590000000000007
  episode_reward_mean: 4.720338164251211
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 207
  episodes_total: 75913
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.18846899464906
          entropy_coeff: 0.01
          kl: 0.01545114646966887
          policy_loss: -0.07774906791174475
          total_loss: 0.060823778962647006
          vf_explained_var: 0.9482285380363464
          vf_loss: 0.12525789241903054
    num_agent_steps_sampled: 3848370
    num_agent_steps_trained: 3848370
    num_steps_sampled: 3848370
    num_steps_trained: 3848

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,445,219850,3848370,4.72034,13.59,-0.48,48.401




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3858366
  custom_metrics: {}
  date: 2021-11-21_08-02-23
  done: false
  episode_len_mean: 48.34634146341463
  episode_media: {}
  episode_reward_max: 11.700000000000001
  episode_reward_mean: 4.275609756097564
  episode_reward_min: -0.4600000000000002
  episodes_this_iter: 205
  episodes_total: 76118
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1835218633274476
          entropy_coeff: 0.01
          kl: 0.015106304193725658
          policy_loss: -0.0793708154776875
          total_loss: 0.054918331139206966
          vf_explained_var: 0.9377947449684143
          vf_loss: 0.12171031594618661
    num_agent_steps_sampled: 3858366
    num_agent_steps_trained: 3858366
    num_steps_sampled: 3858366
    num_steps_trained: 385

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,446,220426,3858366,4.27561,11.7,-0.46,48.3463




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3868362
  custom_metrics: {}
  date: 2021-11-21_08-11-34
  done: false
  episode_len_mean: 48.47572815533981
  episode_media: {}
  episode_reward_max: 11.690000000000005
  episode_reward_mean: 4.527961165048548
  episode_reward_min: -0.46000000000000024
  episodes_this_iter: 206
  episodes_total: 76324
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.19343691336582
          entropy_coeff: 0.01
          kl: 0.015098819832634664
          policy_loss: -0.07794383818105548
          total_loss: 0.049112360006501946
          vf_explained_var: 0.9537760615348816
          vf_loss: 0.11459356703193312
    num_agent_steps_sampled: 3868362
    num_agent_steps_trained: 3868362
    num_steps_sampled: 3868362
    num_steps_trained: 386

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,447,220976,3868362,4.52796,11.69,-0.46,48.4757


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3878358
  custom_metrics: {}
  date: 2021-11-21_08-20-34
  done: false
  episode_len_mean: 48.66990291262136
  episode_media: {}
  episode_reward_max: 13.610000000000005
  episode_reward_mean: 4.583543689320392
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 206
  episodes_total: 76530
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1758462815878383
          entropy_coeff: 0.01
          kl: 0.014881400286513182
          policy_loss: -0.07971215124405746
          total_loss: 0.05837734304044292
          vf_explained_var: 0.9424855709075928
          vf_loss: 0.12594626493121772
    num_agent_steps_sampled: 3878358
    num_agent_steps_trained: 3878358
    num_steps_sampled: 3878358
    num_steps_trained: 387

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,448,221517,3878358,4.58354,13.61,-0.51,48.6699




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3888354
  custom_metrics: {}
  date: 2021-11-21_08-30-00
  done: false
  episode_len_mean: 47.84688995215311
  episode_media: {}
  episode_reward_max: 13.680000000000005
  episode_reward_mean: 4.961578947368425
  episode_reward_min: -0.3900000000000002
  episodes_this_iter: 209
  episodes_total: 76739
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.144270350703274
          entropy_coeff: 0.01
          kl: 0.014950247080612544
          policy_loss: -0.07481407008337088
          total_loss: 0.05866095710232211
          vf_explained_var: 0.9309216141700745
          vf_loss: 0.12085919809961565
    num_agent_steps_sampled: 3888354
    num_agent_steps_trained: 3888354
    num_steps_sampled: 3888354
    num_steps_trained: 3888

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,449,222083,3888354,4.96158,13.68,-0.39,47.8469




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3898350
  custom_metrics: {}
  date: 2021-11-21_08-39-14
  done: false
  episode_len_mean: 48.25480769230769
  episode_media: {}
  episode_reward_max: 13.620000000000008
  episode_reward_mean: 4.490480769230772
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 208
  episodes_total: 76947
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1900250899504465
          entropy_coeff: 0.01
          kl: 0.014698088676053494
          policy_loss: -0.07666521762786238
          total_loss: 0.048252533993358354
          vf_explained_var: 0.9450881481170654
          vf_loss: 0.11333391822090963
    num_agent_steps_sampled: 3898350
    num_agent_steps_trained: 3898350
    num_steps_sampled: 3898350
    num_steps_trained: 38

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,450,222637,3898350,4.49048,13.62,-0.53,48.2548


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3908346
  custom_metrics: {}
  date: 2021-11-21_08-48-17
  done: false
  episode_len_mean: 47.89423076923077
  episode_media: {}
  episode_reward_max: 13.630000000000004
  episode_reward_mean: 4.641634615384619
  episode_reward_min: -0.4600000000000002
  episodes_this_iter: 208
  episodes_total: 77155
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1896198179348407
          entropy_coeff: 0.01
          kl: 0.01428284648131432
          policy_loss: -0.0817582230183119
          total_loss: 0.034385946385506565
          vf_explained_var: 0.9587282538414001
          vf_loss: 0.10550225709712724
    num_agent_steps_sampled: 3908346
    num_agent_steps_trained: 3908346
    num_steps_sampled: 3908346
    num_steps_trained: 3908

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,451,223179,3908346,4.64163,13.63,-0.46,47.8942




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3918342
  custom_metrics: {}
  date: 2021-11-21_08-57-32
  done: false
  episode_len_mean: 48.066985645933016
  episode_media: {}
  episode_reward_max: 11.720000000000004
  episode_reward_mean: 4.624736842105267
  episode_reward_min: -0.4100000000000002
  episodes_this_iter: 209
  episodes_total: 77364
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.180290333765099
          entropy_coeff: 0.01
          kl: 0.015282080299251002
          policy_loss: -0.07910561631137952
          total_loss: 0.049906834139943956
          vf_explained_var: 0.9520941376686096
          vf_loss: 0.11600086339450176
    num_agent_steps_sampled: 3918342
    num_agent_steps_trained: 3918342
    num_steps_sampled: 3918342
    num_steps_trained: 39

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,452,223734,3918342,4.62474,11.72,-0.41,48.067




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3928338
  custom_metrics: {}
  date: 2021-11-21_09-06-59
  done: false
  episode_len_mean: 47.7799043062201
  episode_media: {}
  episode_reward_max: 15.610000000000007
  episode_reward_mean: 4.369569377990434
  episode_reward_min: -0.47000000000000036
  episodes_this_iter: 209
  episodes_total: 77573
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.187475083487101
          entropy_coeff: 0.01
          kl: 0.014389981256148966
          policy_loss: -0.07714908462668998
          total_loss: 0.030231811385810686
          vf_explained_var: 0.9351078271865845
          vf_loss: 0.09647346990983409
    num_agent_steps_sampled: 3928338
    num_agent_steps_trained: 3928338
    num_steps_sampled: 3928338
    num_steps_trained: 392

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,453,224301,3928338,4.36957,15.61,-0.47,47.7799


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3938334
  custom_metrics: {}
  date: 2021-11-21_09-15-58
  done: false
  episode_len_mean: 48.35436893203884
  episode_media: {}
  episode_reward_max: 11.700000000000005
  episode_reward_mean: 5.00577669902913
  episode_reward_min: -0.47000000000000025
  episodes_this_iter: 206
  episodes_total: 77779
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.171466203602442
          entropy_coeff: 0.01
          kl: 0.01568050691330901
          policy_loss: -0.0742391711511823
          total_loss: 0.06299582371732149
          vf_explained_var: 0.9561370611190796
          vf_loss: 0.1232275011174074
    num_agent_steps_sampled: 3938334
    num_agent_steps_trained: 3938334
    num_steps_sampled: 3938334
    num_steps_trained: 3938334

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,454,224840,3938334,5.00578,11.7,-0.47,48.3544




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3948330
  custom_metrics: {}
  date: 2021-11-21_09-25-21
  done: false
  episode_len_mean: 47.803827751196174
  episode_media: {}
  episode_reward_max: 13.640000000000006
  episode_reward_mean: 4.750095693779907
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 209
  episodes_total: 77988
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.17520494946993
          entropy_coeff: 0.01
          kl: 0.015886194047871914
          policy_loss: -0.07408989692390225
          total_loss: 0.0654841427776956
          vf_explained_var: 0.9488665461540222
          vf_loss: 0.12513535094669978
    num_agent_steps_sampled: 3948330
    num_agent_steps_trained: 3948330
    num_steps_sampled: 3948330
    num_steps_trained: 39483

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,455,225404,3948330,4.7501,13.64,-0.5,47.8038




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3958326
  custom_metrics: {}
  date: 2021-11-21_09-34-48
  done: false
  episode_len_mean: 47.628571428571426
  episode_media: {}
  episode_reward_max: 13.600000000000009
  episode_reward_mean: 4.612809523809528
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 210
  episodes_total: 78198
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.172380560349269
          entropy_coeff: 0.01
          kl: 0.015610906610594987
          policy_loss: -0.07758702663367327
          total_loss: 0.07326398441443187
          vf_explained_var: 0.9388830661773682
          vf_loss: 0.13701121878738226
    num_agent_steps_sampled: 3958326
    num_agent_steps_trained: 3958326
    num_steps_sampled: 3958326
    num_steps_trained: 395

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,456,225970,3958326,4.61281,13.6,-0.52,47.6286


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3968322
  custom_metrics: {}
  date: 2021-11-21_09-43-49
  done: false
  episode_len_mean: 47.99519230769231
  episode_media: {}
  episode_reward_max: 15.630000000000006
  episode_reward_mean: 4.811634615384619
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 208
  episodes_total: 78406
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1478600516376725
          entropy_coeff: 0.01
          kl: 0.015018142751111923
          policy_loss: -0.07621690357502164
          total_loss: 0.058781430228266186
          vf_explained_var: 0.9419919848442078
          vf_loss: 0.12226372580062672
    num_agent_steps_sampled: 3968322
    num_agent_steps_trained: 3968322
    num_steps_sampled: 3968322
    num_steps_trained: 39

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,457,226512,3968322,4.81163,15.63,-0.54,47.9952




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3978318
  custom_metrics: {}
  date: 2021-11-21_09-53-01
  done: false
  episode_len_mean: 48.359223300970875
  episode_media: {}
  episode_reward_max: 13.600000000000007
  episode_reward_mean: 4.58708737864078
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 206
  episodes_total: 78612
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1724299400207028
          entropy_coeff: 0.01
          kl: 0.015261400474669971
          policy_loss: -0.07275253369648838
          total_loss: 0.06917464963405884
          vf_explained_var: 0.9212479591369629
          vf_loss: 0.12888410410630596
    num_agent_steps_sampled: 3978318
    num_agent_steps_trained: 3978318
    num_steps_sampled: 3978318
    num_steps_trained: 397

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,458,227064,3978318,4.58709,13.6,-0.52,48.3592




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3988314
  custom_metrics: {}
  date: 2021-11-21_10-02-14
  done: false
  episode_len_mean: 48.34928229665072
  episode_media: {}
  episode_reward_max: 15.660000000000005
  episode_reward_mean: 4.9864593301435445
  episode_reward_min: -0.46000000000000024
  episodes_this_iter: 209
  episodes_total: 78821
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.148751169108004
          entropy_coeff: 0.01
          kl: 0.01579171966252731
          policy_loss: -0.07685947990136763
          total_loss: 0.08077194371939968
          vf_explained_var: 0.9450455904006958
          vf_loss: 0.14314342165341504
    num_agent_steps_sampled: 3988314
    num_agent_steps_trained: 3988314
    num_steps_sampled: 3988314
    num_steps_trained: 398

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,459,227616,3988314,4.98646,15.66,-0.46,48.3493




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 3998310
  custom_metrics: {}
  date: 2021-11-21_10-11-26
  done: false
  episode_len_mean: 48.71078431372549
  episode_media: {}
  episode_reward_max: 13.680000000000005
  episode_reward_mean: 4.602843137254905
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 204
  episodes_total: 79025
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1771307125149004
          entropy_coeff: 0.01
          kl: 0.016433182655074135
          policy_loss: -0.07767185374369988
          total_loss: 0.07271465442616931
          vf_explained_var: 0.9481737613677979
          vf_loss: 0.13472097002411165
    num_agent_steps_sampled: 3998310
    num_agent_steps_trained: 3998310
    num_steps_sampled: 3998310
    num_steps_trained: 39

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,460,228169,3998310,4.60284,13.68,-0.48,48.7108




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4008306
  custom_metrics: {}
  date: 2021-11-21_10-20-40
  done: false
  episode_len_mean: 48.10576923076923
  episode_media: {}
  episode_reward_max: 11.89
  episode_reward_mean: 4.2964423076923115
  episode_reward_min: -0.46000000000000024
  episodes_this_iter: 208
  episodes_total: 79233
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.191100105415865
          entropy_coeff: 0.01
          kl: 0.015152404257824104
          policy_loss: -0.07307548963298648
          total_loss: 0.058989973298374965
          vf_explained_var: 0.9385526776313782
          vf_loss: 0.11945739093897714
    num_agent_steps_sampled: 4008306
    num_agent_steps_trained: 4008306
    num_steps_sampled: 4008306
    num_steps_trained: 4008306
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,461,228723,4008306,4.29644,11.89,-0.46,48.1058




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4018302
  custom_metrics: {}
  date: 2021-11-21_10-30-12
  done: false
  episode_len_mean: 48.04807692307692
  episode_media: {}
  episode_reward_max: 11.640000000000006
  episode_reward_mean: 4.792307692307696
  episode_reward_min: -0.4500000000000002
  episodes_this_iter: 208
  episodes_total: 79441
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.161437741652071
          entropy_coeff: 0.01
          kl: 0.015329923025311354
          policy_loss: -0.07551355490158856
          total_loss: 0.06197961888611027
          vf_explained_var: 0.9496448636054993
          vf_loss: 0.12418406879993148
    num_agent_steps_sampled: 4018302
    num_agent_steps_trained: 4018302
    num_steps_sampled: 4018302
    num_steps_trained: 4018

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,462,229295,4018302,4.79231,11.64,-0.45,48.0481




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4028298
  custom_metrics: {}
  date: 2021-11-21_10-39-24
  done: false
  episode_len_mean: 48.16346153846154
  episode_media: {}
  episode_reward_max: 17.630000000000006
  episode_reward_mean: 4.799086538461543
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 208
  episodes_total: 79649
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1544633122572456
          entropy_coeff: 0.01
          kl: 0.015648200525440064
          policy_loss: -0.07878873434442447
          total_loss: 0.06927181120944693
          vf_explained_var: 0.9546553492546082
          vf_loss: 0.1339566208614807
    num_agent_steps_sampled: 4028298
    num_agent_steps_trained: 4028298
    num_steps_sampled: 4028298
    num_steps_trained: 4028

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,463,229846,4028298,4.79909,17.63,-0.55,48.1635




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4038294
  custom_metrics: {}
  date: 2021-11-21_10-48-34
  done: false
  episode_len_mean: 48.1207729468599
  episode_media: {}
  episode_reward_max: 15.580000000000005
  episode_reward_mean: 4.209565217391307
  episode_reward_min: -0.5900000000000003
  episodes_this_iter: 207
  episodes_total: 79856
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.203166263146573
          entropy_coeff: 0.01
          kl: 0.014767991150775896
          policy_loss: -0.0760730107778895
          total_loss: 0.06942823770887153
          vf_explained_var: 0.9183452725410461
          vf_loss: 0.13388957981699068
    num_agent_steps_sampled: 4038294
    num_agent_steps_trained: 4038294
    num_steps_sampled: 4038294
    num_steps_trained: 403829

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,464,230397,4038294,4.20957,15.58,-0.59,48.1208




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4048290
  custom_metrics: {}
  date: 2021-11-21_10-57-50
  done: false
  episode_len_mean: 47.24528301886792
  episode_media: {}
  episode_reward_max: 15.710000000000003
  episode_reward_mean: 4.7851886792452865
  episode_reward_min: -0.4399999999999995
  episodes_this_iter: 212
  episodes_total: 80068
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1785489819136012
          entropy_coeff: 0.01
          kl: 0.014715844978092571
          policy_loss: -0.07681541187288472
          total_loss: 0.05657357359122258
          vf_explained_var: 0.9529961347579956
          vf_loss: 0.12164994015210648
    num_agent_steps_sampled: 4048290
    num_agent_steps_trained: 4048290
    num_steps_sampled: 4048290
    num_steps_trained: 40

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,465,230952,4048290,4.78519,15.71,-0.44,47.2453




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4058286
  custom_metrics: {}
  date: 2021-11-21_11-07-03
  done: false
  episode_len_mean: 47.99519230769231
  episode_media: {}
  episode_reward_max: 13.690000000000007
  episode_reward_mean: 4.9448076923076965
  episode_reward_min: -0.45000000000000023
  episodes_this_iter: 208
  episodes_total: 80276
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1630608386064627
          entropy_coeff: 0.01
          kl: 0.014682616715896147
          policy_loss: -0.07678403697681567
          total_loss: 0.05177134378127046
          vf_explained_var: 0.954839289188385
          vf_loss: 0.11673715240976597
    num_agent_steps_sampled: 4058286
    num_agent_steps_trained: 4058286
    num_steps_sampled: 4058286
    num_steps_trained: 40

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,466,231505,4058286,4.94481,13.69,-0.45,47.9952




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4068282
  custom_metrics: {}
  date: 2021-11-21_11-16-27
  done: false
  episode_len_mean: 47.91866028708134
  episode_media: {}
  episode_reward_max: 11.750000000000004
  episode_reward_mean: 4.662105263157898
  episode_reward_min: -0.4200000000000002
  episodes_this_iter: 209
  episodes_total: 80485
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1577921257200967
          entropy_coeff: 0.01
          kl: 0.01516821458785881
          policy_loss: -0.07599838756391382
          total_loss: 0.06277546551567478
          vf_explained_var: 0.9577246308326721
          vf_loss: 0.1257966842017058
    num_agent_steps_sampled: 4068282
    num_agent_steps_trained: 4068282
    num_steps_sampled: 4068282
    num_steps_trained: 40682

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,467,232069,4068282,4.66211,11.75,-0.42,47.9187




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4078278
  custom_metrics: {}
  date: 2021-11-21_11-25-38
  done: false
  episode_len_mean: 48.37864077669903
  episode_media: {}
  episode_reward_max: 13.640000000000006
  episode_reward_mean: 4.744902912621363
  episode_reward_min: -0.45000000000000023
  episodes_this_iter: 206
  episodes_total: 80691
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1539613596406806
          entropy_coeff: 0.01
          kl: 0.01500184047053943
          policy_loss: -0.07175303969919017
          total_loss: 0.07643823916287476
          vf_explained_var: 0.9509563446044922
          vf_loss: 0.13555482271058208
    num_agent_steps_sampled: 4078278
    num_agent_steps_trained: 4078278
    num_steps_sampled: 4078278
    num_steps_trained: 407

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,468,232620,4078278,4.7449,13.64,-0.45,48.3786




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4088274
  custom_metrics: {}
  date: 2021-11-21_11-34-51
  done: false
  episode_len_mean: 48.14903846153846
  episode_media: {}
  episode_reward_max: 15.690000000000005
  episode_reward_mean: 4.8512019230769265
  episode_reward_min: -0.4300000000000002
  episodes_this_iter: 208
  episodes_total: 80899
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1541817574137188
          entropy_coeff: 0.01
          kl: 0.014634402282067516
          policy_loss: -0.07554044471872479
          total_loss: 0.06581749241112919
          vf_explained_var: 0.9424532055854797
          vf_loss: 0.12956075552966909
    num_agent_steps_sampled: 4088274
    num_agent_steps_trained: 4088274
    num_steps_sampled: 4088274
    num_steps_trained: 40

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,469,233173,4088274,4.8512,15.69,-0.43,48.149




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4098270
  custom_metrics: {}
  date: 2021-11-21_11-44-05
  done: false
  episode_len_mean: 48.1256038647343
  episode_media: {}
  episode_reward_max: 17.640000000000004
  episode_reward_mean: 4.7556038647343035
  episode_reward_min: -0.42000000000000015
  episodes_this_iter: 207
  episodes_total: 81106
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1397353886839854
          entropy_coeff: 0.01
          kl: 0.015216789220793812
          policy_loss: -0.07237690763814393
          total_loss: 0.07494023395920375
          vf_explained_var: 0.9472171068191528
          vf_loss: 0.13404874614763437
    num_agent_steps_sampled: 4098270
    num_agent_steps_trained: 4098270
    num_steps_sampled: 4098270
    num_steps_trained: 40

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,470,233727,4098270,4.7556,17.64,-0.42,48.1256


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4108266
  custom_metrics: {}
  date: 2021-11-21_11-53-06
  done: false
  episode_len_mean: 48.34782608695652
  episode_media: {}
  episode_reward_max: 15.660000000000005
  episode_reward_mean: 4.829371980676332
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 207
  episodes_total: 81313
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.146537560607535
          entropy_coeff: 0.01
          kl: 0.015594265049337464
          policy_loss: -0.07568198000665882
          total_loss: 0.07294344629093297
          vf_explained_var: 0.9372114539146423
          vf_loss: 0.13456511595436038
    num_agent_steps_sampled: 4108266
    num_agent_steps_trained: 4108266
    num_steps_sampled: 4108266
    num_steps_trained: 410

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,471,234268,4108266,4.82937,15.66,-0.48,48.3478




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4118262
  custom_metrics: {}
  date: 2021-11-21_12-02-33
  done: false
  episode_len_mean: 48.43689320388349
  episode_media: {}
  episode_reward_max: 13.680000000000005
  episode_reward_mean: 4.7845145631068
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 206
  episodes_total: 81519
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1507322008351246
          entropy_coeff: 0.01
          kl: 0.015275915322458158
          policy_loss: -0.07461787324260133
          total_loss: 0.06278819654421039
          vf_explained_var: 0.9244546294212341
          vf_loss: 0.12411294625679323
    num_agent_steps_sampled: 4118262
    num_agent_steps_trained: 4118262
    num_steps_sampled: 4118262
    num_steps_trained: 41182

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,472,234835,4118262,4.78451,13.68,-0.5,48.4369




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4128258
  custom_metrics: {}
  date: 2021-11-21_12-12-24
  done: false
  episode_len_mean: 48.17307692307692
  episode_media: {}
  episode_reward_max: 13.600000000000007
  episode_reward_mean: 4.757692307692311
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 208
  episodes_total: 81727
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.163319004371942
          entropy_coeff: 0.01
          kl: 0.015418615225512864
          policy_loss: -0.07970399278478743
          total_loss: 0.0631381672357694
          vf_explained_var: 0.9513898491859436
          vf_loss: 0.12934981613189925
    num_agent_steps_sampled: 4128258
    num_agent_steps_trained: 4128258
    num_steps_sampled: 4128258
    num_steps_trained: 4128

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,473,235426,4128258,4.75769,13.6,-0.49,48.1731


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4138254
  custom_metrics: {}
  date: 2021-11-21_12-21-24
  done: false
  episode_len_mean: 48.08653846153846
  episode_media: {}
  episode_reward_max: 13.580000000000007
  episode_reward_mean: 4.761875000000003
  episode_reward_min: -0.46000000000000024
  episodes_this_iter: 208
  episodes_total: 81935
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.162716326153422
          entropy_coeff: 0.01
          kl: 0.014782865803374472
          policy_loss: -0.07791348079213989
          total_loss: 0.05364975722920232
          vf_explained_var: 0.9566330909729004
          vf_loss: 0.1195131830153912
    num_agent_steps_sampled: 4138254
    num_agent_steps_trained: 4138254
    num_steps_sampled: 4138254
    num_steps_trained: 4138

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,474,235966,4138254,4.76188,13.58,-0.46,48.0865


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4148250
  custom_metrics: {}
  date: 2021-11-21_12-30-23
  done: false
  episode_len_mean: 48.650485436893206
  episode_media: {}
  episode_reward_max: 17.679999999999993
  episode_reward_mean: 4.597524271844663
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 206
  episodes_total: 82141
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1683902518337512
          entropy_coeff: 0.01
          kl: 0.015825240248420903
          policy_loss: -0.07551504226589659
          total_loss: 0.06832541643885766
          vf_explained_var: 0.9334242939949036
          vf_loss: 0.1294724843764012
    num_agent_steps_sampled: 4148250
    num_agent_steps_trained: 4148250
    num_steps_sampled: 4148250
    num_steps_trained: 41

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,475,236505,4148250,4.59752,17.68,-0.49,48.6505




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4158246
  custom_metrics: {}
  date: 2021-11-21_12-40-03
  done: false
  episode_len_mean: 48.02884615384615
  episode_media: {}
  episode_reward_max: 13.740000000000004
  episode_reward_mean: 4.530721153846158
  episode_reward_min: -0.46000000000000024
  episodes_this_iter: 208
  episodes_total: 82349
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1517241909561386
          entropy_coeff: 0.01
          kl: 0.01450249708076564
          policy_loss: -0.07655714787092675
          total_loss: 0.056320012745302496
          vf_explained_var: 0.9515449404716492
          vf_loss: 0.12135589985021701
    num_agent_steps_sampled: 4158246
    num_agent_steps_trained: 4158246
    num_steps_sampled: 4158246
    num_steps_trained: 41

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,476,237085,4158246,4.53072,13.74,-0.46,48.0288




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4168242
  custom_metrics: {}
  date: 2021-11-21_12-49-15
  done: false
  episode_len_mean: 48.07692307692308
  episode_media: {}
  episode_reward_max: 17.68999999999998
  episode_reward_mean: 4.665096153846156
  episode_reward_min: -0.46000000000000024
  episodes_this_iter: 208
  episodes_total: 82557
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.142909423917173
          entropy_coeff: 0.01
          kl: 0.015047463344105045
          policy_loss: -0.07454034387638131
          total_loss: 0.05519834866141339
          vf_explained_var: 0.9472811222076416
          vf_loss: 0.11688778287624514
    num_agent_steps_sampled: 4168242
    num_agent_steps_trained: 4168242
    num_steps_sampled: 4168242
    num_steps_trained: 4168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,477,237637,4168242,4.6651,17.69,-0.46,48.0769




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4178238
  custom_metrics: {}
  date: 2021-11-21_12-58-56
  done: false
  episode_len_mean: 48.03365384615385
  episode_media: {}
  episode_reward_max: 15.660000000000004
  episode_reward_mean: 4.899951923076927
  episode_reward_min: -0.47000000000000025
  episodes_this_iter: 208
  episodes_total: 82765
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.154285291208321
          entropy_coeff: 0.01
          kl: 0.014802065238830956
          policy_loss: -0.07643829377864962
          total_loss: 0.05046304718061764
          vf_explained_var: 0.9605068564414978
          vf_loss: 0.11472323830220774
    num_agent_steps_sampled: 4178238
    num_agent_steps_trained: 4178238
    num_steps_sampled: 4178238
    num_steps_trained: 417

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,478,238218,4178238,4.89995,15.66,-0.47,48.0337




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4188234
  custom_metrics: {}
  date: 2021-11-21_13-08-07
  done: false
  episode_len_mean: 48.872549019607845
  episode_media: {}
  episode_reward_max: 13.710000000000004
  episode_reward_mean: 4.636911764705886
  episode_reward_min: -0.47000000000000025
  episodes_this_iter: 204
  episodes_total: 82969
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1421089600367718
          entropy_coeff: 0.01
          kl: 0.015677711596613044
          policy_loss: -0.07468274892454506
          total_loss: 0.06493452797815161
          vf_explained_var: 0.9452487826347351
          vf_loss: 0.1253225787805513
    num_agent_steps_sampled: 4188234
    num_agent_steps_trained: 4188234
    num_steps_sampled: 4188234
    num_steps_trained: 41

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,479,238769,4188234,4.63691,13.71,-0.47,48.8725




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4198230
  custom_metrics: {}
  date: 2021-11-21_13-17-19
  done: false
  episode_len_mean: 48.111111111111114
  episode_media: {}
  episode_reward_max: 15.560000000000008
  episode_reward_mean: 4.84618357487923
  episode_reward_min: -0.47000000000000025
  episodes_this_iter: 207
  episodes_total: 83176
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.158820813654896
          entropy_coeff: 0.01
          kl: 0.015188976877190254
          policy_loss: -0.07321184910345266
          total_loss: 0.07478334994422883
          vf_explained_var: 0.9494154453277588
          vf_loss: 0.1349810186363981
    num_agent_steps_sampled: 4198230
    num_agent_steps_trained: 4198230
    num_steps_sampled: 4198230
    num_steps_trained: 4198

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,480,239321,4198230,4.84618,15.56,-0.47,48.1111


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4208226
  custom_metrics: {}
  date: 2021-11-21_13-26-19
  done: false
  episode_len_mean: 48.3768115942029
  episode_media: {}
  episode_reward_max: 17.67
  episode_reward_mean: 4.931449275362322
  episode_reward_min: -0.4400000000000002
  episodes_this_iter: 207
  episodes_total: 83383
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1384525899666857
          entropy_coeff: 0.01
          kl: 0.015234376521683518
          policy_loss: -0.07607471108936609
          total_loss: 0.07084932545821135
          vf_explained_var: 0.9508056640625
          vf_loss: 0.13360274693289076
    num_agent_steps_sampled: 4208226
    num_agent_steps_trained: 4208226
    num_steps_sampled: 4208226
    num_steps_trained: 4208226
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,481,239860,4208226,4.93145,17.67,-0.44,48.3768




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4218222
  custom_metrics: {}
  date: 2021-11-21_13-35-27
  done: false
  episode_len_mean: 49.633663366336634
  episode_media: {}
  episode_reward_max: 15.600000000000007
  episode_reward_mean: 5.169207920792083
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 202
  episodes_total: 83585
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.134675973175041
          entropy_coeff: 0.01
          kl: 0.01503009979170253
          policy_loss: -0.07780666926754196
          total_loss: 0.058945189755102906
          vf_explained_var: 0.9541347026824951
          vf_loss: 0.12385817063966743
    num_agent_steps_sampled: 4218222
    num_agent_steps_trained: 4218222
    num_steps_sampled: 4218222
    num_steps_trained: 421

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,482,240408,4218222,5.16921,15.6,-0.51,49.6337




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4228218
  custom_metrics: {}
  date: 2021-11-21_13-44-52
  done: false
  episode_len_mean: 47.8421052631579
  episode_media: {}
  episode_reward_max: 13.690000000000005
  episode_reward_mean: 5.100526315789477
  episode_reward_min: -0.46000000000000024
  episodes_this_iter: 209
  episodes_total: 83794
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.134994918945803
          entropy_coeff: 0.01
          kl: 0.015018455119744572
          policy_loss: -0.07541470876532116
          total_loss: 0.06508973770620455
          vf_explained_var: 0.9484106302261353
          vf_loss: 0.1276404766117159
    num_agent_steps_sampled: 4228218
    num_agent_steps_trained: 4228218
    num_steps_sampled: 4228218
    num_steps_trained: 42282

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,483,240974,4228218,5.10053,13.69,-0.46,47.8421


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4238214
  custom_metrics: {}
  date: 2021-11-21_13-53-51
  done: false
  episode_len_mean: 48.6747572815534
  episode_media: {}
  episode_reward_max: 15.590000000000007
  episode_reward_mean: 4.3716990291262166
  episode_reward_min: -0.4800000000000002
  episodes_this_iter: 206
  episodes_total: 84000
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1747472422429355
          entropy_coeff: 0.01
          kl: 0.014237223596526552
          policy_loss: -0.07723927749556968
          total_loss: 0.041634050546666176
          vf_explained_var: 0.9412339925765991
          vf_loss: 0.10818662428863751
    num_agent_steps_sampled: 4238214
    num_agent_steps_trained: 4238214
    num_steps_sampled: 4238214
    num_steps_trained: 42

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,484,241513,4238214,4.3717,15.59,-0.48,48.6748




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4248210
  custom_metrics: {}
  date: 2021-11-21_14-03-03
  done: false
  episode_len_mean: 48.398058252427184
  episode_media: {}
  episode_reward_max: 13.640000000000006
  episode_reward_mean: 5.1411650485436935
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 206
  episodes_total: 84206
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.132552768188308
          entropy_coeff: 0.01
          kl: 0.01534516251389677
          policy_loss: -0.07480397236677346
          total_loss: 0.06784613007851341
          vf_explained_var: 0.9574026465415955
          vf_loss: 0.12901743020634185
    num_agent_steps_sampled: 4248210
    num_agent_steps_trained: 4248210
    num_steps_sampled: 4248210
    num_steps_trained: 424

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,485,242065,4248210,5.14117,13.64,-0.51,48.3981




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4258206
  custom_metrics: {}
  date: 2021-11-21_14-12-16
  done: false
  episode_len_mean: 48.57560975609756
  episode_media: {}
  episode_reward_max: 13.690000000000005
  episode_reward_mean: 4.462682926829271
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 205
  episodes_total: 84411
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.143595189574253
          entropy_coeff: 0.01
          kl: 0.014866112535327096
          policy_loss: -0.07504381659615669
          total_loss: 0.054627478702605
          vf_explained_var: 0.9505674839019775
          vf_loss: 0.11724038301709007
    num_agent_steps_sampled: 4258206
    num_agent_steps_trained: 4258206
    num_steps_sampled: 4258206
    num_steps_trained: 425820

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,486,242617,4258206,4.46268,13.69,-0.5,48.5756




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4268202
  custom_metrics: {}
  date: 2021-11-21_14-21-28
  done: false
  episode_len_mean: 47.98086124401914
  episode_media: {}
  episode_reward_max: 13.680000000000005
  episode_reward_mean: 4.56813397129187
  episode_reward_min: -0.47000000000000025
  episodes_this_iter: 209
  episodes_total: 84620
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1575727542241414
          entropy_coeff: 0.01
          kl: 0.014835797123919258
          policy_loss: -0.07600733107910472
          total_loss: 0.04937331079609468
          vf_explained_var: 0.9574404358863831
          vf_loss: 0.11315856756844435
    num_agent_steps_sampled: 4268202
    num_agent_steps_trained: 4268202
    num_steps_sampled: 4268202
    num_steps_trained: 426

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,487,243170,4268202,4.56813,13.68,-0.47,47.9809




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4278198
  custom_metrics: {}
  date: 2021-11-21_14-30-39
  done: false
  episode_len_mean: 48.80975609756098
  episode_media: {}
  episode_reward_max: 13.680000000000005
  episode_reward_mean: 4.889804878048785
  episode_reward_min: -0.4400000000000002
  episodes_this_iter: 205
  episodes_total: 84825
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1460834879951785
          entropy_coeff: 0.01
          kl: 0.014349111875437732
          policy_loss: -0.07618936496991333
          total_loss: 0.0517690355781967
          vf_explained_var: 0.945921003818512
          vf_loss: 0.11673016464739784
    num_agent_steps_sampled: 4278198
    num_agent_steps_trained: 4278198
    num_steps_sampled: 4278198
    num_steps_trained: 42781

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,488,243720,4278198,4.8898,13.68,-0.44,48.8098




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4288194
  custom_metrics: {}
  date: 2021-11-21_14-39-53
  done: false
  episode_len_mean: 47.856459330143544
  episode_media: {}
  episode_reward_max: 15.610000000000007
  episode_reward_mean: 5.309712918660291
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 209
  episodes_total: 85034
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1261339751113373
          entropy_coeff: 0.01
          kl: 0.015362579390397544
          policy_loss: -0.07232370864364483
          total_loss: 0.07138759079988884
          vf_explained_var: 0.9506874084472656
          vf_loss: 0.12997476185151155
    num_agent_steps_sampled: 4288194
    num_agent_steps_trained: 4288194
    num_steps_sampled: 4288194
    num_steps_trained: 42

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,489,244274,4288194,5.30971,15.61,-0.52,47.8565




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4298190
  custom_metrics: {}
  date: 2021-11-21_14-49-21
  done: false
  episode_len_mean: 47.67788461538461
  episode_media: {}
  episode_reward_max: 13.690000000000005
  episode_reward_mean: 4.873221153846158
  episode_reward_min: -0.47000000000000025
  episodes_this_iter: 208
  episodes_total: 85242
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.118584254635386
          entropy_coeff: 0.01
          kl: 0.015520775744521329
          policy_loss: -0.0752549256776074
          total_loss: 0.06987416778087735
          vf_explained_var: 0.936764657497406
          vf_loss: 0.13095666773123554
    num_agent_steps_sampled: 4298190
    num_agent_steps_trained: 4298190
    num_steps_sampled: 4298190
    num_steps_trained: 42981

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,490,244842,4298190,4.87322,13.69,-0.47,47.6779


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4308186
  custom_metrics: {}
  date: 2021-11-21_14-58-21
  done: false
  episode_len_mean: 47.904761904761905
  episode_media: {}
  episode_reward_max: 13.640000000000006
  episode_reward_mean: 4.765000000000004
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 210
  episodes_total: 85452
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1020290979659224
          entropy_coeff: 0.01
          kl: 0.015424343159486492
          policy_loss: -0.07508050428693376
          total_loss: 0.06916845860505344
          vf_explained_var: 0.95048987865448
          vf_loss: 0.1301306704739969
    num_agent_steps_sampled: 4308186
    num_agent_steps_trained: 4308186
    num_steps_sampled: 4308186
    num_steps_trained: 4308

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,491,245382,4308186,4.765,13.64,-0.49,47.9048




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4318182
  custom_metrics: {}
  date: 2021-11-21_15-07-33
  done: false
  episode_len_mean: 48.616504854368934
  episode_media: {}
  episode_reward_max: 13.710000000000004
  episode_reward_mean: 5.0361165048543715
  episode_reward_min: -0.46000000000000024
  episodes_this_iter: 206
  episodes_total: 85658
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.114856803058142
          entropy_coeff: 0.01
          kl: 0.01581596640867787
          policy_loss: -0.06781254022738224
          total_loss: 0.08433496075578667
          vf_explained_var: 0.9476137161254883
          vf_loss: 0.13726531896201602
    num_agent_steps_sampled: 4318182
    num_agent_steps_trained: 4318182
    num_steps_sampled: 4318182
    num_steps_trained: 43

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,492,245934,4318182,5.03612,13.71,-0.46,48.6165




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4328178
  custom_metrics: {}
  date: 2021-11-21_15-16-47
  done: false
  episode_len_mean: 48.5219512195122
  episode_media: {}
  episode_reward_max: 15.550000000000008
  episode_reward_mean: 4.729512195121955
  episode_reward_min: -0.45000000000000023
  episodes_this_iter: 205
  episodes_total: 85863
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.12505681294514
          entropy_coeff: 0.01
          kl: 0.015093008807079254
          policy_loss: -0.07555754783876094
          total_loss: 0.06204811115415772
          vf_explained_var: 0.9543329477310181
          vf_loss: 0.12447246487708725
    num_agent_steps_sampled: 4328178
    num_agent_steps_trained: 4328178
    num_steps_sampled: 4328178
    num_steps_trained: 43281

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,493,246488,4328178,4.72951,15.55,-0.45,48.522




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4338174
  custom_metrics: {}
  date: 2021-11-21_15-26-00
  done: false
  episode_len_mean: 49.181372549019606
  episode_media: {}
  episode_reward_max: 15.660000000000005
  episode_reward_mean: 4.875784313725494
  episode_reward_min: -0.45000000000000023
  episodes_this_iter: 204
  episodes_total: 86067
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.114587903046704
          entropy_coeff: 0.01
          kl: 0.015236000136068809
          policy_loss: -0.07577485638473325
          total_loss: 0.06345924429666944
          vf_explained_var: 0.9524962902069092
          vf_loss: 0.1256704658222194
    num_agent_steps_sampled: 4338174
    num_agent_steps_trained: 4338174
    num_steps_sampled: 4338174
    num_steps_trained: 433

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,494,247041,4338174,4.87578,15.66,-0.45,49.1814




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4348170
  custom_metrics: {}
  date: 2021-11-21_15-35-15
  done: false
  episode_len_mean: 49.25615763546798
  episode_media: {}
  episode_reward_max: 15.710000000000004
  episode_reward_mean: 5.2064039408867036
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 203
  episodes_total: 86270
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.085638584358146
          entropy_coeff: 0.01
          kl: 0.015799587280374273
          policy_loss: -0.07126488119099512
          total_loss: 0.09659617165202088
          vf_explained_var: 0.9369630813598633
          vf_loss: 0.152724002960772
    num_agent_steps_sampled: 4348170
    num_agent_steps_trained: 4348170
    num_steps_sampled: 4348170
    num_steps_trained: 4348

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,495,247596,4348170,5.2064,15.71,-0.49,49.2562


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4358166
  custom_metrics: {}
  date: 2021-11-21_15-44-21
  done: false
  episode_len_mean: 49.73134328358209
  episode_media: {}
  episode_reward_max: 15.690000000000005
  episode_reward_mean: 4.810447761194034
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 201
  episodes_total: 86471
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1182275162403843
          entropy_coeff: 0.01
          kl: 0.01519677771253406
          policy_loss: -0.07291868612386072
          total_loss: 0.0715923981352143
          vf_explained_var: 0.9472105503082275
          vf_loss: 0.13107319840054255
    num_agent_steps_sampled: 4358166
    num_agent_steps_trained: 4358166
    num_steps_sampled: 4358166
    num_steps_trained: 4358

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,496,248143,4358166,4.81045,15.69,-0.49,49.7313




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4368162
  custom_metrics: {}
  date: 2021-11-21_15-53-41
  done: false
  episode_len_mean: 48.86764705882353
  episode_media: {}
  episode_reward_max: 15.680000000000005
  episode_reward_mean: 5.103431372549023
  episode_reward_min: -0.4400000000000002
  episodes_this_iter: 204
  episodes_total: 86675
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1106245870092306
          entropy_coeff: 0.01
          kl: 0.015439381977495881
          policy_loss: -0.07521024293012792
          total_loss: 0.0667867700238629
          vf_explained_var: 0.9523615837097168
          vf_loss: 0.12793041513035874
    num_agent_steps_sampled: 4368162
    num_agent_steps_trained: 4368162
    num_steps_sampled: 4368162
    num_steps_trained: 4368

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,497,248703,4368162,5.10343,15.68,-0.44,48.8676




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4378158
  custom_metrics: {}
  date: 2021-11-21_16-03-00
  done: false
  episode_len_mean: 49.935
  episode_media: {}
  episode_reward_max: 13.640000000000006
  episode_reward_mean: 4.668750000000004
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 200
  episodes_total: 86875
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0948841613938054
          entropy_coeff: 0.01
          kl: 0.014765705167666022
          policy_loss: -0.07231884447912205
          total_loss: 0.06871197767223021
          vf_explained_var: 0.9548239707946777
          vf_loss: 0.12834154023755207
    num_agent_steps_sampled: 4378158
    num_agent_steps_trained: 4378158
    num_steps_sampled: 4378158
    num_steps_trained: 4378158
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,498,249261,4378158,4.66875,13.64,-0.51,49.935




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4388154
  custom_metrics: {}
  date: 2021-11-21_16-12-26
  done: false
  episode_len_mean: 50.97969543147208
  episode_media: {}
  episode_reward_max: 15.690000000000005
  episode_reward_mean: 4.864619289340106
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 197
  episodes_total: 87072
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1032592241543844
          entropy_coeff: 0.01
          kl: 0.015398482989090252
          policy_loss: -0.07488687852681644
          total_loss: 0.0720999665656801
          vf_explained_var: 0.9379308223724365
          vf_loss: 0.13293976789821
    num_agent_steps_sampled: 4388154
    num_agent_steps_trained: 4388154
    num_steps_sampled: 4388154
    num_steps_trained: 438815

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,499,249827,4388154,4.86462,15.69,-0.48,50.9797




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4398150
  custom_metrics: {}
  date: 2021-11-21_16-21-44
  done: false
  episode_len_mean: 50.776649746192895
  episode_media: {}
  episode_reward_max: 15.660000000000005
  episode_reward_mean: 5.281472081218279
  episode_reward_min: -0.43000000000000016
  episodes_this_iter: 197
  episodes_total: 87269
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.070782761952006
          entropy_coeff: 0.01
          kl: 0.015758530698497145
          policy_loss: -0.07051582570082622
          total_loss: 0.08126320113119162
          vf_explained_var: 0.9586511850357056
          vf_loss: 0.13658695175540614
    num_agent_steps_sampled: 4398150
    num_agent_steps_trained: 4398150
    num_steps_sampled: 4398150
    num_steps_trained: 43

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,500,250385,4398150,5.28147,15.66,-0.43,50.7766


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4408146
  custom_metrics: {}
  date: 2021-11-21_16-30-49
  done: false
  episode_len_mean: 50.93877551020408
  episode_media: {}
  episode_reward_max: 15.670000000000005
  episode_reward_mean: 5.115765306122452
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 196
  episodes_total: 87465
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0824343729210666
          entropy_coeff: 0.01
          kl: 0.016011372019379327
          policy_loss: -0.07089523468944445
          total_loss: 0.08835470336522581
          vf_explained_var: 0.9223991632461548
          vf_loss: 0.1435983740516104
    num_agent_steps_sampled: 4408146
    num_agent_steps_trained: 4408146
    num_steps_sampled: 4408146
    num_steps_trained: 4408

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,501,250930,4408146,5.11577,15.67,-0.53,50.9388




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4418142
  custom_metrics: {}
  date: 2021-11-21_16-40-03
  done: false
  episode_len_mean: 51.46907216494845
  episode_media: {}
  episode_reward_max: 13.720000000000002
  episode_reward_mean: 4.8967525773195915
  episode_reward_min: -0.47000000000000025
  episodes_this_iter: 194
  episodes_total: 87659
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.106802829082711
          entropy_coeff: 0.01
          kl: 0.015751309439563172
          policy_loss: -0.07260546151954142
          total_loss: 0.07779822730393146
          vf_explained_var: 0.9319349527359009
          vf_loss: 0.1355882646467342
    num_agent_steps_sampled: 4418142
    num_agent_steps_trained: 4418142
    num_steps_sampled: 4418142
    num_steps_trained: 441

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,502,251484,4418142,4.89675,13.72,-0.47,51.4691




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4428138
  custom_metrics: {}
  date: 2021-11-21_16-49-31
  done: false
  episode_len_mean: 51.927083333333336
  episode_media: {}
  episode_reward_max: 13.670000000000005
  episode_reward_mean: 4.72354166666667
  episode_reward_min: -0.4400000000000002
  episodes_this_iter: 192
  episodes_total: 87851
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.110912274093513
          entropy_coeff: 0.01
          kl: 0.015759944435928347
          policy_loss: -0.07367085517290474
          total_loss: 0.07268986697020019
          vf_explained_var: 0.9429516792297363
          vf_loss: 0.13156672036667993
    num_agent_steps_sampled: 4428138
    num_agent_steps_trained: 4428138
    num_steps_sampled: 4428138
    num_steps_trained: 4428

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,503,252052,4428138,4.72354,13.67,-0.44,51.9271




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4438134
  custom_metrics: {}
  date: 2021-11-21_16-58-42
  done: false
  episode_len_mean: 52.213541666666664
  episode_media: {}
  episode_reward_max: 13.700000000000005
  episode_reward_mean: 4.949583333333337
  episode_reward_min: -0.47000000000000025
  episodes_this_iter: 192
  episodes_total: 88043
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0628356959685745
          entropy_coeff: 0.01
          kl: 0.014435238413113527
          policy_loss: -0.07654797177624806
          total_loss: 0.057457975503582046
          vf_explained_var: 0.951920747756958
          vf_loss: 0.12174902582629005
    num_agent_steps_sampled: 4438134
    num_agent_steps_trained: 4438134
    num_steps_sampled: 4438134
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,504,252603,4438134,4.94958,13.7,-0.47,52.2135


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4448130
  custom_metrics: {}
  date: 2021-11-21_17-07-33
  done: false
  episode_len_mean: 53.224598930481285
  episode_media: {}
  episode_reward_max: 15.600000000000009
  episode_reward_mean: 5.072352941176475
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 187
  episodes_total: 88230
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.073093560613303
          entropy_coeff: 0.01
          kl: 0.01510933373587989
          policy_loss: -0.07255625660454898
          total_loss: 0.07515877701445284
          vf_explained_var: 0.9431435465812683
          vf_loss: 0.13402501693337662
    num_agent_steps_sampled: 4448130
    num_agent_steps_trained: 4448130
    num_steps_sampled: 4448130
    num_steps_trained: 4448

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,505,253134,4448130,5.07235,15.6,-0.5,53.2246




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4458126
  custom_metrics: {}
  date: 2021-11-21_17-16-38
  done: false
  episode_len_mean: 52.12435233160622
  episode_media: {}
  episode_reward_max: 13.610000000000007
  episode_reward_mean: 4.869067357512957
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 193
  episodes_total: 88423
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0713444990565977
          entropy_coeff: 0.01
          kl: 0.014603594864613451
          policy_loss: -0.06806703730040338
          total_loss: 0.06669262918403693
          vf_explained_var: 0.9464341998100281
          vf_loss: 0.12220429553244502
    num_agent_steps_sampled: 4458126
    num_agent_steps_trained: 4458126
    num_steps_sampled: 4458126
    num_steps_trained: 44

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,506,253679,4458126,4.86907,13.61,-0.49,52.1244




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4468122
  custom_metrics: {}
  date: 2021-11-21_17-25-46
  done: false
  episode_len_mean: 51.885416666666664
  episode_media: {}
  episode_reward_max: 21.66999999999998
  episode_reward_mean: 5.554375000000004
  episode_reward_min: -0.4100000000000002
  episodes_this_iter: 192
  episodes_total: 88615
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.069133011428228
          entropy_coeff: 0.01
          kl: 0.01589231215332938
          policy_loss: -0.0646299991870317
          total_loss: 0.09919766967545163
          vf_explained_var: 0.9468445777893066
          vf_loss: 0.1483143240984821
    num_agent_steps_sampled: 4468122
    num_agent_steps_trained: 4468122
    num_steps_sampled: 4468122
    num_steps_trained: 4468122

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,507,254226,4468122,5.55438,21.67,-0.41,51.8854


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4478118
  custom_metrics: {}
  date: 2021-11-21_17-34-38
  done: false
  episode_len_mean: 52.142105263157895
  episode_media: {}
  episode_reward_max: 15.690000000000005
  episode_reward_mean: 5.236000000000003
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 190
  episodes_total: 88805
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.067214713254607
          entropy_coeff: 0.01
          kl: 0.015711770980552493
          policy_loss: -0.07185263042428996
          total_loss: 0.08188072477778807
          vf_explained_var: 0.9454730749130249
          vf_loss: 0.13861212244642576
    num_agent_steps_sampled: 4478118
    num_agent_steps_trained: 4478118
    num_steps_sampled: 4478118
    num_steps_trained: 447

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,508,254759,4478118,5.236,15.69,-0.54,52.1421


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4488114
  custom_metrics: {}
  date: 2021-11-21_17-43-31
  done: false
  episode_len_mean: 52.05181347150259
  episode_media: {}
  episode_reward_max: 15.600000000000007
  episode_reward_mean: 5.167202072538864
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 193
  episodes_total: 88998
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0800717665967214
          entropy_coeff: 0.01
          kl: 0.015255520221640373
          policy_loss: -0.0689258933031088
          total_loss: 0.07938454180775185
          vf_explained_var: 0.9501486420631409
          vf_loss: 0.13435716988429067
    num_agent_steps_sampled: 4488114
    num_agent_steps_trained: 4488114
    num_steps_sampled: 4488114
    num_steps_trained: 4488

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,509,255292,4488114,5.1672,15.6,-0.51,52.0518




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4498110
  custom_metrics: {}
  date: 2021-11-21_17-52-50
  done: false
  episode_len_mean: 52.083333333333336
  episode_media: {}
  episode_reward_max: 15.690000000000005
  episode_reward_mean: 4.894739583333338
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 192
  episodes_total: 89190
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.070629341128361
          entropy_coeff: 0.01
          kl: 0.014628445897997359
          policy_loss: -0.07471450375634674
          total_loss: 0.07499932911823307
          vf_explained_var: 0.9525302648544312
          vf_loss: 0.13709469735987082
    num_agent_steps_sampled: 4498110
    num_agent_steps_trained: 4498110
    num_steps_sampled: 4498110
    num_steps_trained: 449

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,510,255851,4498110,4.89474,15.69,-0.52,52.0833




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4508106
  custom_metrics: {}
  date: 2021-11-21_18-01-53
  done: false
  episode_len_mean: 52.166666666666664
  episode_media: {}
  episode_reward_max: 15.540000000000006
  episode_reward_mean: 5.646458333333338
  episode_reward_min: -0.4400000000000002
  episodes_this_iter: 192
  episodes_total: 89382
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.05083752056681
          entropy_coeff: 0.01
          kl: 0.015468998038500046
          policy_loss: -0.07017125693269137
          total_loss: 0.08779499149384785
          vf_explained_var: 0.9496524930000305
          vf_loss: 0.1432343109822215
    num_agent_steps_sampled: 4508106
    num_agent_steps_trained: 4508106
    num_steps_sampled: 4508106
    num_steps_trained: 45081

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,511,256394,4508106,5.64646,15.54,-0.44,52.1667


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4518102
  custom_metrics: {}
  date: 2021-11-21_18-10-59
  done: false
  episode_len_mean: 52.225130890052355
  episode_media: {}
  episode_reward_max: 15.670000000000005
  episode_reward_mean: 5.0922513089005275
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 191
  episodes_total: 89573
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.063202642867843
          entropy_coeff: 0.01
          kl: 0.015040587263254445
          policy_loss: -0.07114556427593918
          total_loss: 0.07667346331938432
          vf_explained_var: 0.9440317153930664
          vf_loss: 0.13418671507743676
    num_agent_steps_sampled: 4518102
    num_agent_steps_trained: 4518102
    num_steps_sampled: 4518102
    num_steps_trained: 45

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,512,256939,4518102,5.09225,15.67,-0.51,52.2251




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4528098
  custom_metrics: {}
  date: 2021-11-21_18-20-38
  done: false
  episode_len_mean: 51.43298969072165
  episode_media: {}
  episode_reward_max: 15.550000000000008
  episode_reward_mean: 5.327164948453612
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 194
  episodes_total: 89767
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.064634620377338
          entropy_coeff: 0.01
          kl: 0.01565944539918317
          policy_loss: -0.06932042748180701
          total_loss: 0.08477932982192511
          vf_explained_var: 0.9329443573951721
          vf_loss: 0.13907192859604176
    num_agent_steps_sampled: 4528098
    num_agent_steps_trained: 4528098
    num_steps_sampled: 4528098
    num_steps_trained: 45280

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,513,257519,4528098,5.32716,15.55,-0.54,51.433




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4538094
  custom_metrics: {}
  date: 2021-11-21_18-30-14
  done: false
  episode_len_mean: 50.676767676767675
  episode_media: {}
  episode_reward_max: 15.610000000000007
  episode_reward_mean: 4.856818181818186
  episode_reward_min: -0.4300000000000002
  episodes_this_iter: 198
  episodes_total: 89965
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.093088465833281
          entropy_coeff: 0.01
          kl: 0.014691287988430734
          policy_loss: -0.0728336851272864
          total_loss: 0.061084307222077465
          vf_explained_var: 0.9478856921195984
          vf_loss: 0.12138028449190025
    num_agent_steps_sampled: 4538094
    num_agent_steps_trained: 4538094
    num_steps_sampled: 4538094
    num_steps_trained: 453

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,514,258095,4538094,4.85682,15.61,-0.43,50.6768




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4548090
  custom_metrics: {}
  date: 2021-11-21_18-39-38
  done: false
  episode_len_mean: 52.37696335078534
  episode_media: {}
  episode_reward_max: 15.610000000000007
  episode_reward_mean: 4.94361256544503
  episode_reward_min: -0.47000000000000025
  episodes_this_iter: 191
  episodes_total: 90156
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.094090547619096
          entropy_coeff: 0.01
          kl: 0.014930951410258322
          policy_loss: -0.07731967262333087
          total_loss: 0.06012911529071726
          vf_explained_var: 0.9185881018638611
          vf_loss: 0.12437511852453369
    num_agent_steps_sampled: 4548090
    num_agent_steps_trained: 4548090
    num_steps_sampled: 4548090
    num_steps_trained: 4548

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,515,258659,4548090,4.94361,15.61,-0.47,52.377


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4558086
  custom_metrics: {}
  date: 2021-11-21_18-48-39
  done: false
  episode_len_mean: 52.17277486910995
  episode_media: {}
  episode_reward_max: 15.730000000000006
  episode_reward_mean: 5.4538219895288
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 191
  episodes_total: 90347
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0521024191475297
          entropy_coeff: 0.01
          kl: 0.01480108785230703
          policy_loss: -0.07271241767150194
          total_loss: 0.07780562274092372
          vf_explained_var: 0.9515458345413208
          vf_loss: 0.13732033453359976
    num_agent_steps_sampled: 4558086
    num_agent_steps_trained: 4558086
    num_steps_sampled: 4558086
    num_steps_trained: 455808

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,516,259199,4558086,5.45382,15.73,-0.54,52.1728




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4568082
  custom_metrics: {}
  date: 2021-11-21_18-57-55
  done: false
  episode_len_mean: 52.15625
  episode_media: {}
  episode_reward_max: 15.620000000000006
  episode_reward_mean: 4.589687500000004
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 192
  episodes_total: 90539
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0897003079513947
          entropy_coeff: 0.01
          kl: 0.014431862372386752
          policy_loss: -0.07600744958027612
          total_loss: 0.052176402409704274
          vf_explained_var: 0.9455350637435913
          vf_loss: 0.11620326781052587
    num_agent_steps_sampled: 4568082
    num_agent_steps_trained: 4568082
    num_steps_sampled: 4568082
    num_steps_trained: 4568082
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,517,259755,4568082,4.58969,15.62,-0.48,52.1562




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4578078
  custom_metrics: {}
  date: 2021-11-21_19-07-17
  done: false
  episode_len_mean: 52.44502617801047
  episode_media: {}
  episode_reward_max: 17.61
  episode_reward_mean: 5.758062827225134
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 191
  episodes_total: 90730
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0332104680528604
          entropy_coeff: 0.01
          kl: 0.015693820034390692
          policy_loss: -0.07291060110798414
          total_loss: 0.08240090547854828
          vf_explained_var: 0.9541924595832825
          vf_loss: 0.13989112638320444
    num_agent_steps_sampled: 4578078
    num_agent_steps_trained: 4578078
    num_steps_sampled: 4578078
    num_steps_trained: 4578078
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,518,260317,4578078,5.75806,17.61,-0.51,52.445




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4588074
  custom_metrics: {}
  date: 2021-11-21_19-16-26
  done: false
  episode_len_mean: 52.57068062827225
  episode_media: {}
  episode_reward_max: 15.580000000000007
  episode_reward_mean: 5.025445026178015
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 191
  episodes_total: 90921
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0671349882600776
          entropy_coeff: 0.01
          kl: 0.015544903496238247
          policy_loss: -0.07171429885381503
          total_loss: 0.08487473695198107
          vf_explained_var: 0.9456016421318054
          vf_loss: 0.1418471517569636
    num_agent_steps_sampled: 4588074
    num_agent_steps_trained: 4588074
    num_steps_sampled: 4588074
    num_steps_trained: 458

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,519,260867,4588074,5.02545,15.58,-0.49,52.5707




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4598070
  custom_metrics: {}
  date: 2021-11-21_19-25-45
  done: false
  episode_len_mean: 52.98930481283423
  episode_media: {}
  episode_reward_max: 17.580000000000005
  episode_reward_mean: 5.178823529411769
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 187
  episodes_total: 91108
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.065710394521315
          entropy_coeff: 0.01
          kl: 0.015503798500245122
          policy_loss: -0.07433665858505806
          total_loss: 0.06864787584150894
          vf_explained_var: 0.9503821730613708
          vf_loss: 0.1283220462991957
    num_agent_steps_sampled: 4598070
    num_agent_steps_trained: 4598070
    num_steps_sampled: 4598070
    num_steps_trained: 45980

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,520,261425,4598070,5.17882,17.58,-0.51,52.9893




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4608066
  custom_metrics: {}
  date: 2021-11-21_19-35-05
  done: false
  episode_len_mean: 52.57068062827225
  episode_media: {}
  episode_reward_max: 17.58999999999997
  episode_reward_mean: 5.137801047120424
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 191
  episodes_total: 91299
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0535942094153667
          entropy_coeff: 0.01
          kl: 0.015195802619638906
          policy_loss: -0.07314230538421064
          total_loss: 0.07443387151327248
          vf_explained_var: 0.9275346398353577
          vf_loss: 0.13349418033598867
    num_agent_steps_sampled: 4608066
    num_agent_steps_trained: 4608066
    num_steps_sampled: 4608066
    num_steps_trained: 460

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,521,261986,4608066,5.1378,17.59,-0.48,52.5707


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4618062
  custom_metrics: {}
  date: 2021-11-21_19-43-56
  done: false
  episode_len_mean: 54.15217391304348
  episode_media: {}
  episode_reward_max: 13.670000000000005
  episode_reward_mean: 5.073043478260875
  episode_reward_min: -0.5600000000000003
  episodes_this_iter: 184
  episodes_total: 91483
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0557689106368637
          entropy_coeff: 0.01
          kl: 0.014816684652896546
          policy_loss: -0.07582888865485052
          total_loss: 0.06371149479792113
          vf_explained_var: 0.9408073425292969
          vf_loss: 0.12634381194024843
    num_agent_steps_sampled: 4618062
    num_agent_steps_trained: 4618062
    num_steps_sampled: 4618062
    num_steps_trained: 461

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,522,262517,4618062,5.07304,13.67,-0.56,54.1522




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4628058
  custom_metrics: {}
  date: 2021-11-21_19-53-05
  done: false
  episode_len_mean: 53.33510638297872
  episode_media: {}
  episode_reward_max: 19.63999999999999
  episode_reward_mean: 4.880212765957451
  episode_reward_min: -0.6200000000000003
  episodes_this_iter: 188
  episodes_total: 91671
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0459156219739034
          entropy_coeff: 0.01
          kl: 0.015396405244616069
          policy_loss: -0.07137805025193952
          total_loss: 0.06557153086431056
          vf_explained_var: 0.9457404017448425
          vf_loss: 0.1223337996377045
    num_agent_steps_sampled: 4628058
    num_agent_steps_trained: 4628058
    num_steps_sampled: 4628058
    num_steps_trained: 46280

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,523,263065,4628058,4.88021,19.64,-0.62,53.3351




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4638054
  custom_metrics: {}
  date: 2021-11-21_20-02-56
  done: false
  episode_len_mean: 53.50802139037433
  episode_media: {}
  episode_reward_max: 15.640000000000006
  episode_reward_mean: 5.385133689839577
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 187
  episodes_total: 91858
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0373066291033504
          entropy_coeff: 0.01
          kl: 0.016080904105271146
          policy_loss: -0.07050866002015488
          total_loss: 0.08609627782840165
          vf_explained_var: 0.9430322051048279
          vf_loss: 0.14034369367298233
    num_agent_steps_sampled: 4638054
    num_agent_steps_trained: 4638054
    num_steps_sampled: 4638054
    num_steps_trained: 463

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,524,263656,4638054,5.38513,15.64,-0.53,53.508




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4648050
  custom_metrics: {}
  date: 2021-11-21_20-12-05
  done: false
  episode_len_mean: 54.17391304347826
  episode_media: {}
  episode_reward_max: 15.650000000000006
  episode_reward_mean: 5.422826086956526
  episode_reward_min: -0.5900000000000003
  episodes_this_iter: 184
  episodes_total: 92042
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0320108492929774
          entropy_coeff: 0.01
          kl: 0.015835482715298454
          policy_loss: -0.07320550284924442
          total_loss: 0.075625084510455
          vf_explained_var: 0.9469926357269287
          vf_loss: 0.13307548524815604
    num_agent_steps_sampled: 4648050
    num_agent_steps_trained: 4648050
    num_steps_sampled: 4648050
    num_steps_trained: 46480

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,525,264206,4648050,5.42283,15.65,-0.59,54.1739




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4658046
  custom_metrics: {}
  date: 2021-11-21_20-21-21
  done: false
  episode_len_mean: 53.62903225806452
  episode_media: {}
  episode_reward_max: 15.589999999999996
  episode_reward_mean: 5.509731182795703
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 186
  episodes_total: 92228
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.007595822777614
          entropy_coeff: 0.01
          kl: 0.01645021363233512
          policy_loss: -0.06598751909828827
          total_loss: 0.09629917828445991
          vf_explained_var: 0.9436135292053223
          vf_loss: 0.14488701152813482
    num_agent_steps_sampled: 4658046
    num_agent_steps_trained: 4658046
    num_steps_sampled: 4658046
    num_steps_trained: 4658

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,526,264761,4658046,5.50973,15.59,-0.48,53.629


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4668042
  custom_metrics: {}
  date: 2021-11-21_20-30-15
  done: false
  episode_len_mean: 53.854838709677416
  episode_media: {}
  episode_reward_max: 15.700000000000005
  episode_reward_mean: 5.365913978494628
  episode_reward_min: -0.4600000000000002
  episodes_this_iter: 186
  episodes_total: 92414
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0010005437466036
          entropy_coeff: 0.01
          kl: 0.0149260726665556
          policy_loss: -0.0720185830625656
          total_loss: 0.07337007096506583
          vf_explained_var: 0.9539527297019958
          vf_loss: 0.13139519884108927
    num_agent_steps_sampled: 4668042
    num_agent_steps_trained: 4668042
    num_steps_sampled: 4668042
    num_steps_trained: 46680

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,527,265295,4668042,5.36591,15.7,-0.46,53.8548


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4678038
  custom_metrics: {}
  date: 2021-11-21_20-39-05
  done: false
  episode_len_mean: 55.62222222222222
  episode_media: {}
  episode_reward_max: 11.650000000000006
  episode_reward_mean: 5.251111111111117
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 180
  episodes_total: 92594
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.010565014177537
          entropy_coeff: 0.01
          kl: 0.01431201872840609
          policy_loss: -0.07308578608937576
          total_loss: 0.06477756548332048
          vf_explained_var: 0.9520248174667358
          vf_loss: 0.12536443267073707
    num_agent_steps_sampled: 4678038
    num_agent_steps_trained: 4678038
    num_steps_sampled: 4678038
    num_steps_trained: 46780

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,528,265825,4678038,5.25111,11.65,-0.5,55.6222




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4688034
  custom_metrics: {}
  date: 2021-11-21_20-48-09
  done: false
  episode_len_mean: 54.13513513513514
  episode_media: {}
  episode_reward_max: 15.570000000000007
  episode_reward_mean: 5.1536216216216255
  episode_reward_min: -0.44000000000000017
  episodes_this_iter: 185
  episodes_total: 92779
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0111942620162506
          entropy_coeff: 0.01
          kl: 0.015150032826450336
          policy_loss: -0.07156006118810479
          total_loss: 0.07996056400637311
          vf_explained_var: 0.9541788101196289
          vf_loss: 0.13711889816792194
    num_agent_steps_sampled: 4688034
    num_agent_steps_trained: 4688034
    num_steps_sampled: 4688034
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,529,266370,4688034,5.15362,15.57,-0.44,54.1351




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4698030
  custom_metrics: {}
  date: 2021-11-21_20-57-10
  done: false
  episode_len_mean: 54.972375690607734
  episode_media: {}
  episode_reward_max: 15.640000000000006
  episode_reward_mean: 5.226574585635363
  episode_reward_min: -0.7100000000000004
  episodes_this_iter: 181
  episodes_total: 92960
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0365129737250776
          entropy_coeff: 0.01
          kl: 0.014748679155509446
          policy_loss: -0.07521061892412949
          total_loss: 0.05923846684645395
          vf_explained_var: 0.9473373889923096
          vf_loss: 0.12121487952783763
    num_agent_steps_sampled: 4698030
    num_agent_steps_trained: 4698030
    num_steps_sampled: 4698030
    num_steps_trained: 46

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,530,266911,4698030,5.22657,15.64,-0.71,54.9724


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4708026
  custom_metrics: {}
  date: 2021-11-21_21-06-01
  done: false
  episode_len_mean: 54.62841530054645
  episode_media: {}
  episode_reward_max: 15.620000000000006
  episode_reward_mean: 5.116284153005468
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 183
  episodes_total: 93143
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0117522588695387
          entropy_coeff: 0.01
          kl: 0.015002496465921866
          policy_loss: -0.06884072856412833
          total_loss: 0.0758607337557334
          vf_explained_var: 0.9413124918937683
          vf_loss: 0.13064142230180015
    num_agent_steps_sampled: 4708026
    num_agent_steps_trained: 4708026
    num_steps_sampled: 4708026
    num_steps_trained: 470

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,531,267441,4708026,5.11628,15.62,-0.48,54.6284




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4718022
  custom_metrics: {}
  date: 2021-11-21_21-15-05
  done: false
  episode_len_mean: 54.38918918918919
  episode_media: {}
  episode_reward_max: 17.59
  episode_reward_mean: 5.509243243243247
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 185
  episodes_total: 93328
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.031583364541272
          entropy_coeff: 0.01
          kl: 0.016024269218686493
          policy_loss: -0.07105126618547049
          total_loss: 0.08278214596068144
          vf_explained_var: 0.9189639687538147
          vf_loss: 0.13764395605301463
    num_agent_steps_sampled: 4718022
    num_agent_steps_trained: 4718022
    num_steps_sampled: 4718022
    num_steps_trained: 4718022
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,532,267985,4718022,5.50924,17.59,-0.5,54.3892




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4728018
  custom_metrics: {}
  date: 2021-11-21_21-25-01
  done: false
  episode_len_mean: 53.36021505376344
  episode_media: {}
  episode_reward_max: 15.600000000000007
  episode_reward_mean: 5.594731182795703
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 186
  episodes_total: 93514
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0244479758911824
          entropy_coeff: 0.01
          kl: 0.015304435728751144
          policy_loss: -0.07106822202653691
          total_loss: 0.07758730012097935
          vf_explained_var: 0.9519456624984741
          vf_loss: 0.13403458303255758
    num_agent_steps_sampled: 4728018
    num_agent_steps_trained: 4728018
    num_steps_sampled: 4728018
    num_steps_trained: 472

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,533,268581,4728018,5.59473,15.6,-0.52,53.3602




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4738014
  custom_metrics: {}
  date: 2021-11-21_21-34-02
  done: false
  episode_len_mean: 55.209944751381215
  episode_media: {}
  episode_reward_max: 19.689999999999987
  episode_reward_mean: 5.686132596685089
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 181
  episodes_total: 93695
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0113661764855366
          entropy_coeff: 0.01
          kl: 0.015621784861927324
          policy_loss: -0.07241613710273186
          total_loss: 0.08108803230130711
          vf_explained_var: 0.9404204487800598
          vf_loss: 0.13802945093536023
    num_agent_steps_sampled: 4738014
    num_agent_steps_trained: 4738014
    num_steps_sampled: 4738014
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,534,269122,4738014,5.68613,19.69,-0.49,55.2099


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4748010
  custom_metrics: {}
  date: 2021-11-21_21-42-55
  done: false
  episode_len_mean: 54.167567567567566
  episode_media: {}
  episode_reward_max: 13.630000000000006
  episode_reward_mean: 4.89567567567568
  episode_reward_min: -0.4300000000000002
  episodes_this_iter: 185
  episodes_total: 93880
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0519156026074206
          entropy_coeff: 0.01
          kl: 0.014531420717097891
          policy_loss: -0.0731228079526831
          total_loss: 0.054256683993016266
          vf_explained_var: 0.9460033774375916
          vf_loss: 0.11479425480389167
    num_agent_steps_sampled: 4748010
    num_agent_steps_trained: 4748010
    num_steps_sampled: 4748010
    num_steps_trained: 474

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,535,269655,4748010,4.89568,13.63,-0.43,54.1676




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4758006
  custom_metrics: {}
  date: 2021-11-21_21-51-57
  done: false
  episode_len_mean: 55.361111111111114
  episode_media: {}
  episode_reward_max: 15.690000000000003
  episode_reward_mean: 5.375388888888893
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 180
  episodes_total: 94060
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.035837312874545
          entropy_coeff: 0.01
          kl: 0.01598225948388379
          policy_loss: -0.06677236181542878
          total_loss: 0.08743653439107227
          vf_explained_var: 0.9487535953521729
          vf_loss: 0.13815768318658464
    num_agent_steps_sampled: 4758006
    num_agent_steps_trained: 4758006
    num_steps_sampled: 4758006
    num_steps_trained: 475

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,536,270197,4758006,5.37539,15.69,-0.49,55.3611




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4768002
  custom_metrics: {}
  date: 2021-11-21_22-01-00
  done: false
  episode_len_mean: 55.773480662983424
  episode_media: {}
  episode_reward_max: 15.590000000000007
  episode_reward_mean: 5.486464088397795
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 181
  episodes_total: 94241
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0383548186486027
          entropy_coeff: 0.01
          kl: 0.015177771565088797
          policy_loss: -0.07238312415934617
          total_loss: 0.07816716006035052
          vf_explained_var: 0.9425229430198669
          vf_loss: 0.1363569712579572
    num_agent_steps_sampled: 4768002
    num_agent_steps_trained: 4768002
    num_steps_sampled: 4768002
    num_steps_trained: 476

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,537,270740,4768002,5.48646,15.59,-0.53,55.7735




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4777998
  custom_metrics: {}
  date: 2021-11-21_22-10-04
  done: false
  episode_len_mean: 54.80110497237569
  episode_media: {}
  episode_reward_max: 15.650000000000006
  episode_reward_mean: 5.448674033149176
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 181
  episodes_total: 94422
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.024294603912227
          entropy_coeff: 0.01
          kl: 0.015115498345440358
          policy_loss: -0.06781315335871939
          total_loss: 0.10479751709389526
          vf_explained_var: 0.9220715165138245
          vf_loss: 0.1584186213110652
    num_agent_steps_sampled: 4777998
    num_agent_steps_trained: 4777998
    num_steps_sampled: 4777998
    num_steps_trained: 47779

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,538,271284,4777998,5.44867,15.65,-0.55,54.8011


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4787994
  custom_metrics: {}
  date: 2021-11-21_22-18-55
  done: false
  episode_len_mean: 55.87640449438202
  episode_media: {}
  episode_reward_max: 17.59
  episode_reward_mean: 5.191516853932589
  episode_reward_min: -0.5600000000000003
  episodes_this_iter: 178
  episodes_total: 94600
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.026301320346005
          entropy_coeff: 0.01
          kl: 0.01493500022497062
          policy_loss: -0.07313873727729019
          total_loss: 0.07789827655375398
          vf_explained_var: 0.9432740807533264
          vf_loss: 0.1372762289303112
    num_agent_steps_sampled: 4787994
    num_agent_steps_trained: 4787994
    num_steps_sampled: 4787994
    num_steps_trained: 4787994
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,539,271815,4787994,5.19152,17.59,-0.56,55.8764




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4797990
  custom_metrics: {}
  date: 2021-11-21_22-27-59
  done: false
  episode_len_mean: 55.34615384615385
  episode_media: {}
  episode_reward_max: 15.630000000000006
  episode_reward_mean: 5.157967032967038
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 182
  episodes_total: 94782
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0204462428887684
          entropy_coeff: 0.01
          kl: 0.014991155655642267
          policy_loss: -0.06861311094837447
          total_loss: 0.08306717306316233
          vf_explained_var: 0.9324744343757629
          vf_loss: 0.13773301917794029
    num_agent_steps_sampled: 4797990
    num_agent_steps_trained: 4797990
    num_steps_sampled: 4797990
    num_steps_trained: 479

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,540,272359,4797990,5.15797,15.63,-0.5,55.3462




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4807986
  custom_metrics: {}
  date: 2021-11-21_22-37-03
  done: false
  episode_len_mean: 55.58659217877095
  episode_media: {}
  episode_reward_max: 17.62
  episode_reward_mean: 5.229441340782127
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 179
  episodes_total: 94961
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.000171615237213
          entropy_coeff: 0.01
          kl: 0.01563458809804132
          policy_loss: -0.062125926372939656
          total_loss: 0.11528759466180921
          vf_explained_var: 0.918452799320221
          vf_loss: 0.1617976903569432
    num_agent_steps_sampled: 4807986
    num_agent_steps_trained: 4807986
    num_steps_sampled: 4807986
    num_steps_trained: 4807986
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,541,272903,4807986,5.22944,17.62,-0.53,55.5866




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4817982
  custom_metrics: {}
  date: 2021-11-21_22-46-07
  done: false
  episode_len_mean: 55.175824175824175
  episode_media: {}
  episode_reward_max: 19.629999999999992
  episode_reward_mean: 5.361153846153851
  episode_reward_min: -0.6300000000000003
  episodes_this_iter: 182
  episodes_total: 95143
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.038417901116681
          entropy_coeff: 0.01
          kl: 0.014480128447194072
          policy_loss: -0.07461159019444037
          total_loss: 0.07334218591300544
          vf_explained_var: 0.9340832829475403
          vf_loss: 0.1353504108320211
    num_agent_steps_sampled: 4817982
    num_agent_steps_trained: 4817982
    num_steps_sampled: 4817982
    num_steps_trained: 4817

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,542,273447,4817982,5.36115,19.63,-0.63,55.1758


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4827978
  custom_metrics: {}
  date: 2021-11-21_22-54-58
  done: false
  episode_len_mean: 55.73184357541899
  episode_media: {}
  episode_reward_max: 17.590000000000003
  episode_reward_mean: 5.53011173184358
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 179
  episodes_total: 95322
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0126301727620475
          entropy_coeff: 0.01
          kl: 0.014921176137167872
          policy_loss: -0.0702027494640642
          total_loss: 0.09038509299953075
          vf_explained_var: 0.9423823952674866
          vf_loss: 0.1467218378219327
    num_agent_steps_sampled: 4827978
    num_agent_steps_trained: 4827978
    num_steps_sampled: 4827978
    num_steps_trained: 482797

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,543,273978,4827978,5.53011,17.59,-0.51,55.7318




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4837974
  custom_metrics: {}
  date: 2021-11-21_23-03-59
  done: false
  episode_len_mean: 56.40677966101695
  episode_media: {}
  episode_reward_max: 19.589999999999996
  episode_reward_mean: 5.43203389830509
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 177
  episodes_total: 95499
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0237041960519
          entropy_coeff: 0.01
          kl: 0.016270225413492823
          policy_loss: -0.06739758193400347
          total_loss: 0.1081351223598651
          vf_explained_var: 0.9430162310600281
          vf_loss: 0.15870413844497971
    num_agent_steps_sampled: 4837974
    num_agent_steps_trained: 4837974
    num_steps_sampled: 4837974
    num_steps_trained: 4837974


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,544,274519,4837974,5.43203,19.59,-0.5,56.4068


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4847970
  custom_metrics: {}
  date: 2021-11-21_23-12-51
  done: false
  episode_len_mean: 55.35
  episode_media: {}
  episode_reward_max: 15.630000000000006
  episode_reward_mean: 5.435611111111116
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 180
  episodes_total: 95679
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.010287365975629
          entropy_coeff: 0.01
          kl: 0.01504188248546603
          policy_loss: -0.06937123165938186
          total_loss: 0.10776044014532397
          vf_explained_var: 0.936235249042511
          vf_loss: 0.16296725537745754
    num_agent_steps_sampled: 4847970
    num_agent_steps_trained: 4847970
    num_steps_sampled: 4847970
    num_steps_trained: 4847970
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,545,275051,4847970,5.43561,15.63,-0.5,55.35




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4857966
  custom_metrics: {}
  date: 2021-11-21_23-21-57
  done: false
  episode_len_mean: 54.52173913043478
  episode_media: {}
  episode_reward_max: 13.610000000000007
  episode_reward_mean: 5.517554347826092
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 184
  episodes_total: 95863
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0215483945057575
          entropy_coeff: 0.01
          kl: 0.014868530535251431
          policy_loss: -0.06871909098576907
          total_loss: 0.08570553435578528
          vf_explained_var: 0.9493282437324524
          vf_loss: 0.14076773626767247
    num_agent_steps_sampled: 4857966
    num_agent_steps_trained: 4857966
    num_steps_sampled: 4857966
    num_steps_trained: 48

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,546,275597,4857966,5.51755,13.61,-0.48,54.5217




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4867962
  custom_metrics: {}
  date: 2021-11-21_23-31-08
  done: false
  episode_len_mean: 54.53804347826087
  episode_media: {}
  episode_reward_max: 15.620000000000006
  episode_reward_mean: 5.023967391304352
  episode_reward_min: -0.5600000000000003
  episodes_this_iter: 184
  episodes_total: 96047
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0645932915938427
          entropy_coeff: 0.01
          kl: 0.014588750117109969
          policy_loss: -0.06913067350070784
          total_loss: 0.07560231995244159
          vf_explained_var: 0.9312735199928284
          vf_loss: 0.13214392930981758
    num_agent_steps_sampled: 4867962
    num_agent_steps_trained: 4867962
    num_steps_sampled: 4867962
    num_steps_trained: 486

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,547,276147,4867962,5.02397,15.62,-0.56,54.538




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4877958
  custom_metrics: {}
  date: 2021-11-21_23-40-27
  done: false
  episode_len_mean: 55.21546961325967
  episode_media: {}
  episode_reward_max: 19.589999999999986
  episode_reward_mean: 5.244696132596689
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 181
  episodes_total: 96228
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.038540347512946
          entropy_coeff: 0.01
          kl: 0.014560973662563953
          policy_loss: -0.07416402394041852
          total_loss: 0.08132506103315444
          vf_explained_var: 0.9345823526382446
          vf_loss: 0.14270276969521264
    num_agent_steps_sampled: 4877958
    num_agent_steps_trained: 4877958
    num_steps_sampled: 4877958
    num_steps_trained: 4877

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,548,276706,4877958,5.2447,19.59,-0.51,55.2155


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4887954
  custom_metrics: {}
  date: 2021-11-21_23-49-17
  done: false
  episode_len_mean: 55.4
  episode_media: {}
  episode_reward_max: 15.660000000000005
  episode_reward_mean: 4.977722222222226
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 180
  episodes_total: 96408
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.079494252262345
          entropy_coeff: 0.01
          kl: 0.01458553990402748
          policy_loss: -0.07335825939637661
          total_loss: 0.062002793999915255
          vf_explained_var: 0.9428742527961731
          vf_loss: 0.12292831113363754
    num_agent_steps_sampled: 4887954
    num_agent_steps_trained: 4887954
    num_steps_sampled: 4887954
    num_steps_trained: 4887954
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,549,277236,4887954,4.97772,15.66,-0.49,55.4


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4897950
  custom_metrics: {}
  date: 2021-11-21_23-58-09
  done: false
  episode_len_mean: 54.83516483516483
  episode_media: {}
  episode_reward_max: 17.60999999999998
  episode_reward_mean: 5.151703296703301
  episode_reward_min: -0.5200000000000004
  episodes_this_iter: 182
  episodes_total: 96590
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.067940811220422
          entropy_coeff: 0.01
          kl: 0.015144861873759264
          policy_loss: -0.07070221619775023
          total_loss: 0.0774910659193563
          vf_explained_var: 0.9457582235336304
          vf_loss: 0.13437080064650073
    num_agent_steps_sampled: 4897950
    num_agent_steps_trained: 4897950
    num_steps_sampled: 4897950
    num_steps_trained: 489795

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,550,277769,4897950,5.1517,17.61,-0.52,54.8352




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4907946
  custom_metrics: {}
  date: 2021-11-22_00-07-29
  done: false
  episode_len_mean: 55.13812154696133
  episode_media: {}
  episode_reward_max: 19.65999999999999
  episode_reward_mean: 5.169226519337021
  episode_reward_min: -0.6100000000000003
  episodes_this_iter: 181
  episodes_total: 96771
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.078392409296878
          entropy_coeff: 0.01
          kl: 0.014930904625651697
          policy_loss: -0.06684870715339841
          total_loss: 0.08103046296076805
          vf_explained_var: 0.9398439526557922
          vf_loss: 0.13464862574181746
    num_agent_steps_sampled: 4907946
    num_agent_steps_trained: 4907946
    num_steps_sampled: 4907946
    num_steps_trained: 49079

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,551,278329,4907946,5.16923,19.66,-0.61,55.1381




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4917942
  custom_metrics: {}
  date: 2021-11-22_00-16-30
  done: false
  episode_len_mean: 55.28021978021978
  episode_media: {}
  episode_reward_max: 15.590000000000007
  episode_reward_mean: 5.014945054945059
  episode_reward_min: -0.6400000000000003
  episodes_this_iter: 182
  episodes_total: 96953
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0552289284137357
          entropy_coeff: 0.01
          kl: 0.015793580189338276
          policy_loss: -0.06760424404434805
          total_loss: 0.09743495909057753
          vf_explained_var: 0.9371140599250793
          vf_loss: 0.14961174050482537
    num_agent_steps_sampled: 4917942
    num_agent_steps_trained: 4917942
    num_steps_sampled: 4917942
    num_steps_trained: 491

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,552,278869,4917942,5.01495,15.59,-0.64,55.2802


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4927938
  custom_metrics: {}
  date: 2021-11-22_00-25-21
  done: false
  episode_len_mean: 54.675824175824175
  episode_media: {}
  episode_reward_max: 15.610000000000007
  episode_reward_mean: 4.681208791208795
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 182
  episodes_total: 97135
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.088294063634183
          entropy_coeff: 0.01
          kl: 0.014014975193131352
          policy_loss: -0.073183256388051
          total_loss: 0.05662458460806138
          vf_explained_var: 0.9397945404052734
          vf_loss: 0.11876291599756772
    num_agent_steps_sampled: 4927938
    num_agent_steps_trained: 4927938
    num_steps_sampled: 4927938
    num_steps_trained: 49279

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,553,279400,4927938,4.68121,15.61,-0.53,54.6758


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4937934
  custom_metrics: {}
  date: 2021-11-22_00-34-11
  done: false
  episode_len_mean: 55.53333333333333
  episode_media: {}
  episode_reward_max: 17.580000000000002
  episode_reward_mean: 4.961444444444449
  episode_reward_min: -0.6300000000000003
  episodes_this_iter: 180
  episodes_total: 97315
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.071085844078217
          entropy_coeff: 0.01
          kl: 0.01544418496525428
          policy_loss: -0.0666668844463422
          total_loss: 0.08297739188683262
          vf_explained_var: 0.9196906685829163
          vf_loss: 0.13517135007673478
    num_agent_steps_sampled: 4937934
    num_agent_steps_trained: 4937934
    num_steps_sampled: 4937934
    num_steps_trained: 493793

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,554,279930,4937934,4.96144,17.58,-0.63,55.5333




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4947930
  custom_metrics: {}
  date: 2021-11-22_00-43-28
  done: false
  episode_len_mean: 54.988950276243095
  episode_media: {}
  episode_reward_max: 17.63
  episode_reward_mean: 5.195856353591164
  episode_reward_min: -0.46000000000000024
  episodes_this_iter: 181
  episodes_total: 97496
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0259331025272966
          entropy_coeff: 0.01
          kl: 0.01468584764843218
          policy_loss: -0.07223874558214857
          total_loss: 0.08559487630440103
          vf_explained_var: 0.9419647455215454
          vf_loss: 0.1446367547082449
    num_agent_steps_sampled: 4947930
    num_agent_steps_trained: 4947930
    num_steps_sampled: 4947930
    num_steps_trained: 4947930
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,555,280487,4947930,5.19586,17.63,-0.46,54.989




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4957926
  custom_metrics: {}
  date: 2021-11-22_00-52-32
  done: false
  episode_len_mean: 54.71739130434783
  episode_media: {}
  episode_reward_max: 17.619999999999997
  episode_reward_mean: 4.756521739130439
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 184
  episodes_total: 97680
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.100419657752217
          entropy_coeff: 0.01
          kl: 0.014608865974021891
          policy_loss: -0.06746124408835377
          total_loss: 0.07398530134548006
          vf_explained_var: 0.9360494613647461
          vf_loss: 0.1291699180496393
    num_agent_steps_sampled: 4957926
    num_agent_steps_trained: 4957926
    num_steps_sampled: 4957926
    num_steps_trained: 4957

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,556,281031,4957926,4.75652,17.62,-0.49,54.7174




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4967922
  custom_metrics: {}
  date: 2021-11-22_01-01-42
  done: false
  episode_len_mean: 54.54945054945055
  episode_media: {}
  episode_reward_max: 17.689999999999998
  episode_reward_mean: 5.233241758241762
  episode_reward_min: -0.5700000000000003
  episodes_this_iter: 182
  episodes_total: 97862
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0672979072633995
          entropy_coeff: 0.01
          kl: 0.014938494866909026
          policy_loss: -0.07219338299421735
          total_loss: 0.06766234743967559
          vf_explained_var: 0.932979166507721
          vf_loss: 0.12649694972925907
    num_agent_steps_sampled: 4967922
    num_agent_steps_trained: 4967922
    num_steps_sampled: 4967922
    num_steps_trained: 4967

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,557,281581,4967922,5.23324,17.69,-0.57,54.5495


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4977918
  custom_metrics: {}
  date: 2021-11-22_01-10-34
  done: false
  episode_len_mean: 54.78142076502732
  episode_media: {}
  episode_reward_max: 17.629999999999992
  episode_reward_mean: 4.696448087431698
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 183
  episodes_total: 98045
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.096775496939579
          entropy_coeff: 0.01
          kl: 0.015012916787490777
          policy_loss: -0.06856794741639007
          total_loss: 0.07633675420131412
          vf_explained_var: 0.9266807436943054
          vf_loss: 0.13167115415205213
    num_agent_steps_sampled: 4977918
    num_agent_steps_trained: 4977918
    num_steps_sampled: 4977918
    num_steps_trained: 4977

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,558,282113,4977918,4.69645,17.63,-0.53,54.7814




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4987914
  custom_metrics: {}
  date: 2021-11-22_01-19-35
  done: false
  episode_len_mean: 55.13259668508287
  episode_media: {}
  episode_reward_max: 13.56000000000001
  episode_reward_mean: 4.729889502762435
  episode_reward_min: -0.5800000000000003
  episodes_this_iter: 181
  episodes_total: 98226
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0652557463052283
          entropy_coeff: 0.01
          kl: 0.01432984306504529
          policy_loss: -0.0689034445839785
          total_loss: 0.06888787875090777
          vf_explained_var: 0.9365155100822449
          vf_loss: 0.1257987049570958
    num_agent_steps_sampled: 4987914
    num_agent_steps_trained: 4987914
    num_steps_sampled: 4987914
    num_steps_trained: 4987914

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,559,282654,4987914,4.72989,13.56,-0.58,55.1326


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 4997910
  custom_metrics: {}
  date: 2021-11-22_01-28-24
  done: false
  episode_len_mean: 56.26815642458101
  episode_media: {}
  episode_reward_max: 15.630000000000006
  episode_reward_mean: 5.023128491620116
  episode_reward_min: -0.6100000000000003
  episodes_this_iter: 179
  episodes_total: 98405
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.077030060951968
          entropy_coeff: 0.01
          kl: 0.015163585849701525
          policy_loss: -0.07641300513365408
          total_loss: 0.08029643088952561
          vf_explained_var: 0.9307213425636292
          vf_loss: 0.14293519032092383
    num_agent_steps_sampled: 4997910
    num_agent_steps_trained: 4997910
    num_steps_sampled: 4997910
    num_steps_trained: 4997

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,560,283183,4997910,5.02313,15.63,-0.61,56.2682




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5007906
  custom_metrics: {}
  date: 2021-11-22_01-37-40
  done: false
  episode_len_mean: 55.327777777777776
  episode_media: {}
  episode_reward_max: 13.590000000000005
  episode_reward_mean: 5.178666666666671
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 180
  episodes_total: 98585
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0638184063405878
          entropy_coeff: 0.01
          kl: 0.0149890617910007
          policy_loss: -0.0712766948897286
          total_loss: 0.08767528354731048
          vf_explained_var: 0.9420664310455322
          vf_loss: 0.14544320449655704
    num_agent_steps_sampled: 5007906
    num_agent_steps_trained: 5007906
    num_steps_sampled: 5007906
    num_steps_trained: 50079

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,561,283739,5007906,5.17867,13.59,-0.51,55.3278




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5017902
  custom_metrics: {}
  date: 2021-11-22_01-46-42
  done: false
  episode_len_mean: 55.33701657458563
  episode_media: {}
  episode_reward_max: 15.530000000000008
  episode_reward_mean: 4.713259668508292
  episode_reward_min: -0.5800000000000003
  episodes_this_iter: 181
  episodes_total: 98766
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.079499497279585
          entropy_coeff: 0.01
          kl: 0.015346984924540999
          policy_loss: -0.0720507785875851
          total_loss: 0.07145817773301226
          vf_explained_var: 0.9258275032043457
          vf_loss: 0.1293416001244886
    num_agent_steps_sampled: 5017902
    num_agent_steps_trained: 5017902
    num_steps_sampled: 5017902
    num_steps_trained: 501790

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,562,284281,5017902,4.71326,15.53,-0.58,55.337




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5027898
  custom_metrics: {}
  date: 2021-11-22_01-55-45
  done: false
  episode_len_mean: 55.43888888888889
  episode_media: {}
  episode_reward_max: 17.57
  episode_reward_mean: 5.011388888888893
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 180
  episodes_total: 98946
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0371680565627224
          entropy_coeff: 0.01
          kl: 0.015259883577545225
          policy_loss: -0.06716721529869263
          total_loss: 0.11571304523382847
          vf_explained_var: 0.9252233505249023
          vf_loss: 0.1684880172355983
    num_agent_steps_sampled: 5027898
    num_agent_steps_trained: 5027898
    num_steps_sampled: 5027898
    num_steps_trained: 5027898
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,563,284824,5027898,5.01139,17.57,-0.51,55.4389


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5037894
  custom_metrics: {}
  date: 2021-11-22_02-04-34
  done: false
  episode_len_mean: 56.67231638418079
  episode_media: {}
  episode_reward_max: 17.590000000000003
  episode_reward_mean: 5.093107344632772
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 177
  episodes_total: 99123
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0716832888892376
          entropy_coeff: 0.01
          kl: 0.015734721988596233
          policy_loss: -0.06854801071929827
          total_loss: 0.0883976979568345
          vf_explained_var: 0.9225409626960754
          vf_loss: 0.14181687750191574
    num_agent_steps_sampled: 5037894
    num_agent_steps_trained: 5037894
    num_steps_sampled: 5037894
    num_steps_trained: 503

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,564,285353,5037894,5.09311,17.59,-0.49,56.6723




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5047890
  custom_metrics: {}
  date: 2021-11-22_02-13-37
  done: false
  episode_len_mean: 56.46590909090909
  episode_media: {}
  episode_reward_max: 15.550000000000008
  episode_reward_mean: 4.974488636363641
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 176
  episodes_total: 99299
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0705346309995076
          entropy_coeff: 0.01
          kl: 0.015254393811308786
          policy_loss: -0.07273538051111493
          total_loss: 0.0913007697507866
          vf_explained_var: 0.9256713390350342
          vf_loss: 0.14999007976101525
    num_agent_steps_sampled: 5047890
    num_agent_steps_trained: 5047890
    num_steps_sampled: 5047890
    num_steps_trained: 5047

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,565,285897,5047890,4.97449,15.55,-0.51,56.4659




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5057886
  custom_metrics: {}
  date: 2021-11-22_02-22-54
  done: false
  episode_len_mean: 55.26519337016575
  episode_media: {}
  episode_reward_max: 17.58
  episode_reward_mean: 4.870607734806633
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 181
  episodes_total: 99480
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.060622893208002
          entropy_coeff: 0.01
          kl: 0.014188026351622139
          policy_loss: -0.06404947203504652
          total_loss: 0.07732465294700953
          vf_explained_var: 0.9181507229804993
          vf_loss: 0.12965825529568958
    num_agent_steps_sampled: 5057886
    num_agent_steps_trained: 5057886
    num_steps_sampled: 5057886
    num_steps_trained: 5057886
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,566,286453,5057886,4.87061,17.58,-0.53,55.2652


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5067882
  custom_metrics: {}
  date: 2021-11-22_02-31-47
  done: false
  episode_len_mean: 54.62841530054645
  episode_media: {}
  episode_reward_max: 15.590000000000009
  episode_reward_mean: 4.875846994535524
  episode_reward_min: -0.4800000000000002
  episodes_this_iter: 183
  episodes_total: 99663
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0487172236404265
          entropy_coeff: 0.01
          kl: 0.014045740488840589
          policy_loss: -0.07005157562732081
          total_loss: 0.06961162061219599
          vf_explained_var: 0.9383404850959778
          vf_loss: 0.1281524143461325
    num_agent_steps_sampled: 5067882
    num_agent_steps_trained: 5067882
    num_steps_sampled: 5067882
    num_steps_trained: 5067

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,567,286986,5067882,4.87585,15.59,-0.48,54.6284


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5077878
  custom_metrics: {}
  date: 2021-11-22_02-40-38
  done: false
  episode_len_mean: 55.74860335195531
  episode_media: {}
  episode_reward_max: 15.640000000000006
  episode_reward_mean: 4.874301675977658
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 179
  episodes_total: 99842
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.044353840509093
          entropy_coeff: 0.01
          kl: 0.01506762577341756
          policy_loss: -0.07032364738884518
          total_loss: 0.0764516898186175
          vf_explained_var: 0.9342193007469177
          vf_loss: 0.13289293917372869
    num_agent_steps_sampled: 5077878
    num_agent_steps_trained: 5077878
    num_steps_sampled: 5077878
    num_steps_trained: 507787

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,568,287517,5077878,4.8743,15.64,-0.55,55.7486


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5087874
  custom_metrics: {}
  date: 2021-11-22_02-49-30
  done: false
  episode_len_mean: 55.41436464088398
  episode_media: {}
  episode_reward_max: 15.480000000000008
  episode_reward_mean: 4.972817679558015
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 181
  episodes_total: 100023
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0409510204830323
          entropy_coeff: 0.01
          kl: 0.014815826181780609
          policy_loss: -0.07164509809558531
          total_loss: 0.07267448245245424
          vf_explained_var: 0.9062899947166443
          vf_loss: 0.13097678473645663
    num_agent_steps_sampled: 5087874
    num_agent_steps_trained: 5087874
    num_steps_sampled: 5087874
    num_steps_trained: 50

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,569,288049,5087874,4.97282,15.48,-0.52,55.4144




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5097870
  custom_metrics: {}
  date: 2021-11-22_02-58-48
  done: false
  episode_len_mean: 54.67213114754098
  episode_media: {}
  episode_reward_max: 15.640000000000006
  episode_reward_mean: 5.335136612021863
  episode_reward_min: -0.5800000000000003
  episodes_this_iter: 183
  episodes_total: 100206
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.012044188511898
          entropy_coeff: 0.01
          kl: 0.015066703520458318
          policy_loss: -0.07141069560036294
          total_loss: 0.08603399635970613
          vf_explained_var: 0.9298755526542664
          vf_loss: 0.1432412983320399
    num_agent_steps_sampled: 5097870
    num_agent_steps_trained: 5097870
    num_steps_sampled: 5097870
    num_steps_trained: 5097

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,570,288607,5097870,5.33514,15.64,-0.58,54.6721




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5107866
  custom_metrics: {}
  date: 2021-11-22_03-07-50
  done: false
  episode_len_mean: 54.76923076923077
  episode_media: {}
  episode_reward_max: 19.67000000000001
  episode_reward_mean: 4.916923076923081
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 182
  episodes_total: 100388
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0448204884328036
          entropy_coeff: 0.01
          kl: 0.015780551941480446
          policy_loss: -0.0643119687209307
          total_loss: 0.10019052994252213
          vf_explained_var: 0.9341119527816772
          vf_loss: 0.14900063288530865
    num_agent_steps_sampled: 5107866
    num_agent_steps_trained: 5107866
    num_steps_sampled: 5107866
    num_steps_trained: 5107

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,571,289149,5107866,4.91692,19.67,-0.53,54.7692


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5117862
  custom_metrics: {}
  date: 2021-11-22_03-16-44
  done: false
  episode_len_mean: 53.82258064516129
  episode_media: {}
  episode_reward_max: 17.569999999999975
  episode_reward_mean: 5.134784946236563
  episode_reward_min: -0.47000000000000025
  episodes_this_iter: 186
  episodes_total: 100574
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0364712553091318
          entropy_coeff: 0.01
          kl: 0.015351652656797103
          policy_loss: -0.06668080074563419
          total_loss: 0.09620620980193863
          vf_explained_var: 0.9267486929893494
          vf_loss: 0.14827873914859754
    num_agent_steps_sampled: 5117862
    num_agent_steps_trained: 5117862
    num_steps_sampled: 5117862
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,572,289683,5117862,5.13478,17.57,-0.47,53.8226




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5127858
  custom_metrics: {}
  date: 2021-11-22_03-25-51
  done: false
  episode_len_mean: 53.806451612903224
  episode_media: {}
  episode_reward_max: 15.610000000000007
  episode_reward_mean: 4.737258064516133
  episode_reward_min: -0.5800000000000003
  episodes_this_iter: 186
  episodes_total: 100760
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0477752874414605
          entropy_coeff: 0.01
          kl: 0.014825607948072674
          policy_loss: -0.07101170190807449
          total_loss: 0.08786792892529614
          vf_explained_var: 0.9187474250793457
          vf_loss: 0.1455827947557309
    num_agent_steps_sampled: 5127858
    num_agent_steps_trained: 5127858
    num_steps_sampled: 5127858
    num_steps_trained: 51

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,573,290230,5127858,4.73726,15.61,-0.58,53.8065




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5137854
  custom_metrics: {}
  date: 2021-11-22_03-34-58
  done: false
  episode_len_mean: 53.623655913978496
  episode_media: {}
  episode_reward_max: 13.620000000000008
  episode_reward_mean: 4.911827956989252
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 186
  episodes_total: 100946
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.031036320436432
          entropy_coeff: 0.01
          kl: 0.015118604912822025
          policy_loss: -0.0712703100912496
          total_loss: 0.08804836163697831
          vf_explained_var: 0.9353691935539246
          vf_loss: 0.14518696136621154
    num_agent_steps_sampled: 5137854
    num_agent_steps_trained: 5137854
    num_steps_sampled: 5137854
    num_steps_trained: 513

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,574,290777,5137854,4.91183,13.62,-0.52,53.6237




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5147850
  custom_metrics: {}
  date: 2021-11-22_03-44-03
  done: false
  episode_len_mean: 52.94708994708995
  episode_media: {}
  episode_reward_max: 19.629999999999995
  episode_reward_mean: 4.899365079365083
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 189
  episodes_total: 101135
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.05452205225167
          entropy_coeff: 0.01
          kl: 0.014539427165837303
          policy_loss: -0.07083785438883379
          total_loss: 0.09390032705759548
          vf_explained_var: 0.939069926738739
          vf_loss: 0.15216076821144894
    num_agent_steps_sampled: 5147850
    num_agent_steps_trained: 5147850
    num_steps_sampled: 5147850
    num_steps_trained: 5147

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,575,291322,5147850,4.89937,19.63,-0.48,52.9471


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5157846
  custom_metrics: {}
  date: 2021-11-22_03-52-54
  done: false
  episode_len_mean: 54.59782608695652
  episode_media: {}
  episode_reward_max: 13.630000000000006
  episode_reward_mean: 4.840760869565221
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 184
  episodes_total: 101319
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0552374958992004
          entropy_coeff: 0.01
          kl: 0.016021268128173095
          policy_loss: -0.07081846196752195
          total_loss: 0.09871720911989147
          vf_explained_var: 0.9186561703681946
          vf_loss: 0.15358959370560152
    num_agent_steps_sampled: 5157846
    num_agent_steps_trained: 5157846
    num_steps_sampled: 5157846
    num_steps_trained: 51

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,576,291853,5157846,4.84076,13.63,-0.52,54.5978




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5167842
  custom_metrics: {}
  date: 2021-11-22_04-02-10
  done: false
  episode_len_mean: 54.36612021857923
  episode_media: {}
  episode_reward_max: 13.620000000000006
  episode_reward_mean: 4.934262295081973
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 183
  episodes_total: 101502
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0355897526185673
          entropy_coeff: 0.01
          kl: 0.015087932447286253
          policy_loss: -0.06768720262343449
          total_loss: 0.09107873939249558
          vf_explained_var: 0.9216902852058411
          vf_loss: 0.1447496422976031
    num_agent_steps_sampled: 5167842
    num_agent_steps_trained: 5167842
    num_steps_sampled: 5167842
    num_steps_trained: 516

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,577,292408,5167842,4.93426,13.62,-0.55,54.3661


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5177838
  custom_metrics: {}
  date: 2021-11-22_04-11-04
  done: false
  episode_len_mean: 54.108108108108105
  episode_media: {}
  episode_reward_max: 13.630000000000006
  episode_reward_mean: 4.937135135135139
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 185
  episodes_total: 101687
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0349669583590635
          entropy_coeff: 0.01
          kl: 0.014660868908629032
          policy_loss: -0.07467438972828723
          total_loss: 0.06573892931711493
          vf_explained_var: 0.9399524927139282
          vf_loss: 0.12736369569320233
    num_agent_steps_sampled: 5177838
    num_agent_steps_trained: 5177838
    num_steps_sampled: 5177838
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,578,292943,5177838,4.93714,13.63,-0.53,54.1081




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5187834
  custom_metrics: {}
  date: 2021-11-22_04-20-24
  done: false
  episode_len_mean: 54.21739130434783
  episode_media: {}
  episode_reward_max: 15.510000000000009
  episode_reward_mean: 4.962826086956526
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 184
  episodes_total: 101871
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.9894219139014861
          entropy_coeff: 0.01
          kl: 0.014791703714172428
          policy_loss: -0.06914070126324778
          total_loss: 0.0773416915173701
          vf_explained_var: 0.9336490631103516
          vf_loss: 0.1326792610867181
    num_agent_steps_sampled: 5187834
    num_agent_steps_trained: 5187834
    num_steps_sampled: 5187834
    num_steps_trained: 5187

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,579,293502,5187834,4.96283,15.51,-0.51,54.2174




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5197830
  custom_metrics: {}
  date: 2021-11-22_04-29-32
  done: false
  episode_len_mean: 54.6448087431694
  episode_media: {}
  episode_reward_max: 15.380000000000011
  episode_reward_mean: 5.298469945355196
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 183
  episodes_total: 102054
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.992257473004391
          entropy_coeff: 0.01
          kl: 0.014963483862792752
          policy_loss: -0.06826299364981522
          total_loss: 0.08972792724023997
          vf_explained_var: 0.9380208253860474
          vf_loss: 0.1438248080308044
    num_agent_steps_sampled: 5197830
    num_agent_steps_trained: 5197830
    num_steps_sampled: 5197830
    num_steps_trained: 5197

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,580,294050,5197830,5.29847,15.38,-0.48,54.6448




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5207826
  custom_metrics: {}
  date: 2021-11-22_04-38-34
  done: false
  episode_len_mean: 54.23913043478261
  episode_media: {}
  episode_reward_max: 19.609999999999978
  episode_reward_mean: 5.173043478260873
  episode_reward_min: -0.6000000000000003
  episodes_this_iter: 184
  episodes_total: 102238
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0171693518219223
          entropy_coeff: 0.01
          kl: 0.015091464418776741
          policy_loss: -0.06917197027974162
          total_loss: 0.0832379320131465
          vf_explained_var: 0.9304418563842773
          vf_loss: 0.13820135136280792
    num_agent_steps_sampled: 5207826
    num_agent_steps_trained: 5207826
    num_steps_sampled: 5207826
    num_steps_trained: 520

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,581,294593,5207826,5.17304,19.61,-0.6,54.2391




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5217822
  custom_metrics: {}
  date: 2021-11-22_04-47-49
  done: false
  episode_len_mean: 54.30434782608695
  episode_media: {}
  episode_reward_max: 17.57
  episode_reward_mean: 5.103967391304352
  episode_reward_min: -0.46000000000000024
  episodes_this_iter: 184
  episodes_total: 102422
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0130098724460983
          entropy_coeff: 0.01
          kl: 0.01598371274687391
          policy_loss: -0.06794231485255368
          total_loss: 0.09071356053265121
          vf_explained_var: 0.9107504487037659
          vf_loss: 0.14237307634879742
    num_agent_steps_sampled: 5217822
    num_agent_steps_trained: 5217822
    num_steps_sampled: 5217822
    num_steps_trained: 5217822
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,582,295147,5217822,5.10397,17.57,-0.46,54.3043




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5227818
  custom_metrics: {}
  date: 2021-11-22_04-56-51
  done: false
  episode_len_mean: 54.994535519125684
  episode_media: {}
  episode_reward_max: 15.48000000000001
  episode_reward_mean: 4.950601092896179
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 183
  episodes_total: 102605
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.020025741240107
          entropy_coeff: 0.01
          kl: 0.015142727471662893
          policy_loss: -0.06204314450036372
          total_loss: 0.09178655042589606
          vf_explained_var: 0.9256460666656494
          vf_loss: 0.13953292516563237
    num_agent_steps_sampled: 5227818
    num_agent_steps_trained: 5227818
    num_steps_sampled: 5227818
    num_steps_trained: 522

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,583,295690,5227818,4.9506,15.48,-0.51,54.9945


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5237814
  custom_metrics: {}
  date: 2021-11-22_05-05-41
  done: false
  episode_len_mean: 55.05524861878453
  episode_media: {}
  episode_reward_max: 17.580000000000005
  episode_reward_mean: 5.129447513812159
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 181
  episodes_total: 102786
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0169459957195572
          entropy_coeff: 0.01
          kl: 0.013907509891443723
          policy_loss: -0.06740926096173357
          total_loss: 0.0807089736341923
          vf_explained_var: 0.9309860467910767
          vf_loss: 0.13660464763611435
    num_agent_steps_sampled: 5237814
    num_agent_steps_trained: 5237814
    num_steps_sampled: 5237814
    num_steps_trained: 52

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,584,296220,5237814,5.12945,17.58,-0.48,55.0552




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5247810
  custom_metrics: {}
  date: 2021-11-22_05-14-47
  done: false
  episode_len_mean: 54.06521739130435
  episode_media: {}
  episode_reward_max: 13.600000000000007
  episode_reward_mean: 4.976630434782613
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 184
  episodes_total: 102970
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0228277045320793
          entropy_coeff: 0.01
          kl: 0.015090095830614264
          policy_loss: -0.06884952533332415
          total_loss: 0.08415646700500712
          vf_explained_var: 0.9311182498931885
          vf_loss: 0.13885714333021945
    num_agent_steps_sampled: 5247810
    num_agent_steps_trained: 5247810
    num_steps_sampled: 5247810
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,585,296765,5247810,4.97663,13.6,-0.49,54.0652




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5257806
  custom_metrics: {}
  date: 2021-11-22_05-23-54
  done: false
  episode_len_mean: 53.89784946236559
  episode_media: {}
  episode_reward_max: 13.640000000000006
  episode_reward_mean: 4.776075268817208
  episode_reward_min: -0.4800000000000002
  episodes_this_iter: 186
  episodes_total: 103156
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.02444282198048
          entropy_coeff: 0.01
          kl: 0.014295705399304023
          policy_loss: -0.06969966141132683
          total_loss: 0.07769130162037857
          vf_explained_var: 0.9313750863075256
          vf_loss: 0.13506798639465273
    num_agent_steps_sampled: 5257806
    num_agent_steps_trained: 5257806
    num_steps_sampled: 5257806
    num_steps_trained: 5257

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,586,297313,5257806,4.77608,13.64,-0.48,53.8978




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5267802
  custom_metrics: {}
  date: 2021-11-22_05-33-18
  done: false
  episode_len_mean: 54.54891304347826
  episode_media: {}
  episode_reward_max: 15.660000000000005
  episode_reward_mean: 4.8679347826087
  episode_reward_min: -0.5700000000000003
  episodes_this_iter: 184
  episodes_total: 103340
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0418123153558216
          entropy_coeff: 0.01
          kl: 0.014668560257670679
          policy_loss: -0.06856866017217225
          total_loss: 0.07405099500772942
          vf_explained_var: 0.9061076045036316
          vf_loss: 0.12962096301753664
    num_agent_steps_sampled: 5267802
    num_agent_steps_trained: 5267802
    num_steps_sampled: 5267802
    num_steps_trained: 5267

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,587,297877,5267802,4.86793,15.66,-0.57,54.5489


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5277798
  custom_metrics: {}
  date: 2021-11-22_05-42-12
  done: false
  episode_len_mean: 53.832432432432434
  episode_media: {}
  episode_reward_max: 13.560000000000008
  episode_reward_mean: 4.557405405405409
  episode_reward_min: -0.5600000000000003
  episodes_this_iter: 185
  episodes_total: 103525
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0503235189072098
          entropy_coeff: 0.01
          kl: 0.0143497980993122
          policy_loss: -0.06972740607241605
          total_loss: 0.06830431349634414
          vf_explained_var: 0.933291494846344
          vf_loss: 0.1258443191947685
    num_agent_steps_sampled: 5277798
    num_agent_steps_trained: 5277798
    num_steps_sampled: 5277798
    num_steps_trained: 52777

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,588,298411,5277798,4.55741,13.56,-0.56,53.8324




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5287794
  custom_metrics: {}
  date: 2021-11-22_05-51-18
  done: false
  episode_len_mean: 53.28191489361702
  episode_media: {}
  episode_reward_max: 15.620000000000006
  episode_reward_mean: 5.143936170212769
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 188
  episodes_total: 103713
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.040839179955333
          entropy_coeff: 0.01
          kl: 0.01524181755181653
          policy_loss: -0.07164869092300152
          total_loss: 0.09133976829441262
          vf_explained_var: 0.9400190711021423
          vf_loss: 0.14867408441269328
    num_agent_steps_sampled: 5287794
    num_agent_steps_trained: 5287794
    num_steps_sampled: 5287794
    num_steps_trained: 5287

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,589,298956,5287794,5.14394,15.62,-0.54,53.2819




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5297790
  custom_metrics: {}
  date: 2021-11-22_06-00-24
  done: false
  episode_len_mean: 53.5668449197861
  episode_media: {}
  episode_reward_max: 15.490000000000009
  episode_reward_mean: 4.967005347593587
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 187
  episodes_total: 103900
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.021815138935564
          entropy_coeff: 0.01
          kl: 0.01514392630657703
          policy_loss: -0.06934854431962546
          total_loss: 0.09464364114813008
          vf_explained_var: 0.9211416244506836
          vf_loss: 0.14971057955684314
    num_agent_steps_sampled: 5297790
    num_agent_steps_trained: 5297790
    num_steps_sampled: 5297790
    num_steps_trained: 52977

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,590,299502,5297790,4.96701,15.49,-0.53,53.5668




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5307786
  custom_metrics: {}
  date: 2021-11-22_06-09-32
  done: false
  episode_len_mean: 53.91891891891892
  episode_media: {}
  episode_reward_max: 13.620000000000006
  episode_reward_mean: 5.057621621621625
  episode_reward_min: -0.46000000000000024
  episodes_this_iter: 185
  episodes_total: 104085
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.044122482423323
          entropy_coeff: 0.01
          kl: 0.014586175502068366
          policy_loss: -0.07415641232097282
          total_loss: 0.0784289931628574
          vf_explained_var: 0.9350923299789429
          vf_loss: 0.13979749819366674
    num_agent_steps_sampled: 5307786
    num_agent_steps_trained: 5307786
    num_steps_sampled: 5307786
    num_steps_trained: 530

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,591,300050,5307786,5.05762,13.62,-0.46,53.9189




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5317782
  custom_metrics: {}
  date: 2021-11-22_06-18-53
  done: false
  episode_len_mean: 52.4
  episode_media: {}
  episode_reward_max: 15.520000000000008
  episode_reward_mean: 5.065894736842108
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 190
  episodes_total: 104275
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0347272628282447
          entropy_coeff: 0.01
          kl: 0.01434544953279092
          policy_loss: -0.06371547416226922
          total_loss: 0.0864937984913379
          vf_explained_var: 0.935451090335846
          vf_loss: 0.13787581703848165
    num_agent_steps_sampled: 5317782
    num_agent_steps_trained: 5317782
    num_steps_sampled: 5317782
    num_steps_trained: 5317782
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,592,300611,5317782,5.06589,15.52,-0.48,52.4


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5327778
  custom_metrics: {}
  date: 2021-11-22_06-27-47
  done: false
  episode_len_mean: 54.47826086956522
  episode_media: {}
  episode_reward_max: 17.51
  episode_reward_mean: 5.551467391304351
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 184
  episodes_total: 104459
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.028561302923773
          entropy_coeff: 0.01
          kl: 0.014368954906630864
          policy_loss: -0.06496892735672026
          total_loss: 0.10605645021905839
          vf_explained_var: 0.9363157749176025
          vf_loss: 0.15857671475193152
    num_agent_steps_sampled: 5327778
    num_agent_steps_trained: 5327778
    num_steps_sampled: 5327778
    num_steps_trained: 5327778
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,593,301145,5327778,5.55147,17.51,-0.5,54.4783




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5337774
  custom_metrics: {}
  date: 2021-11-22_06-36-55
  done: false
  episode_len_mean: 53.74193548387097
  episode_media: {}
  episode_reward_max: 15.600000000000007
  episode_reward_mean: 5.03139784946237
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 186
  episodes_total: 104645
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0335970935572583
          entropy_coeff: 0.01
          kl: 0.014370178610965555
          policy_loss: -0.07193431004456367
          total_loss: 0.0746522419613526
          vf_explained_var: 0.9310120344161987
          vf_loss: 0.1341854585686242
    num_agent_steps_sampled: 5337774
    num_agent_steps_trained: 5337774
    num_steps_sampled: 5337774
    num_steps_trained: 53377

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,594,301694,5337774,5.0314,15.6,-0.55,53.7419




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5347770
  custom_metrics: {}
  date: 2021-11-22_06-46-03
  done: false
  episode_len_mean: 53.8054054054054
  episode_media: {}
  episode_reward_max: 15.700000000000005
  episode_reward_mean: 5.308000000000004
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 185
  episodes_total: 104830
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.027216574130767
          entropy_coeff: 0.01
          kl: 0.014586730961613954
          policy_loss: -0.07253381861792113
          total_loss: 0.08879385555710066
          vf_explained_var: 0.9334452152252197
          vf_loss: 0.1483694433114944
    num_agent_steps_sampled: 5347770
    num_agent_steps_trained: 5347770
    num_steps_sampled: 5347770
    num_steps_trained: 53477

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,595,302242,5347770,5.308,15.7,-0.51,53.8054




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5357766
  custom_metrics: {}
  date: 2021-11-22_06-55-10
  done: false
  episode_len_mean: 54.59239130434783
  episode_media: {}
  episode_reward_max: 17.450000000000003
  episode_reward_mean: 5.1589130434782655
  episode_reward_min: -0.5000000000000003
  episodes_this_iter: 184
  episodes_total: 105014
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0336307535927935
          entropy_coeff: 0.01
          kl: 0.014489056759182953
          policy_loss: -0.06913475691791506
          total_loss: 0.09116237792259199
          vf_explained_var: 0.9308554530143738
          vf_loss: 0.1476255585660178
    num_agent_steps_sampled: 5357766
    num_agent_steps_trained: 5357766
    num_steps_sampled: 5357766
    num_steps_trained: 53

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,596,302788,5357766,5.15891,17.45,-0.5,54.5924


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5367762
  custom_metrics: {}
  date: 2021-11-22_07-04-05
  done: false
  episode_len_mean: 53.774193548387096
  episode_media: {}
  episode_reward_max: 15.640000000000004
  episode_reward_mean: 4.998548387096778
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 186
  episodes_total: 105200
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0547234140246746
          entropy_coeff: 0.01
          kl: 0.013988305310868901
          policy_loss: -0.07086513997503836
          total_loss: 0.08240081311546181
          vf_explained_var: 0.922469973564148
          vf_loss: 0.14194607744470283
    num_agent_steps_sampled: 5367762
    num_agent_steps_trained: 5367762
    num_steps_sampled: 5367762
    num_steps_trained: 53

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,597,303323,5367762,4.99855,15.64,-0.54,53.7742




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5377758
  custom_metrics: {}
  date: 2021-11-22_07-13-24
  done: false
  episode_len_mean: 53.497297297297294
  episode_media: {}
  episode_reward_max: 13.780000000000003
  episode_reward_mean: 5.107837837837842
  episode_reward_min: -0.4800000000000002
  episodes_this_iter: 185
  episodes_total: 105385
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0137795587859477
          entropy_coeff: 0.01
          kl: 0.014717228703581412
          policy_loss: -0.0711899080436586
          total_loss: 0.08574147690636724
          vf_explained_var: 0.9370403289794922
          vf_loss: 0.14354149217822076
    num_agent_steps_sampled: 5377758
    num_agent_steps_trained: 5377758
    num_steps_sampled: 5377758
    num_steps_trained: 53

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,598,303882,5377758,5.10784,13.78,-0.48,53.4973




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5387754
  custom_metrics: {}
  date: 2021-11-22_07-22-30
  done: false
  episode_len_mean: 54.0427807486631
  episode_media: {}
  episode_reward_max: 17.69
  episode_reward_mean: 5.022673796791449
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 187
  episodes_total: 105572
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0217127136078226
          entropy_coeff: 0.01
          kl: 0.015802472553141576
          policy_loss: -0.06865143571313295
          total_loss: 0.09961518337414824
          vf_explained_var: 0.9245328903198242
          vf_loss: 0.1524837364749347
    num_agent_steps_sampled: 5387754
    num_agent_steps_trained: 5387754
    num_steps_sampled: 5387754
    num_steps_trained: 5387754
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,599,304428,5387754,5.02267,17.69,-0.54,54.0428




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5397750
  custom_metrics: {}
  date: 2021-11-22_07-31-41
  done: false
  episode_len_mean: 53.94054054054054
  episode_media: {}
  episode_reward_max: 17.459999999999972
  episode_reward_mean: 5.101459459459463
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 185
  episodes_total: 105757
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0184590334873125
          entropy_coeff: 0.01
          kl: 0.014718694430950962
          policy_loss: -0.06770254790990804
          total_loss: 0.09257638839012818
          vf_explained_var: 0.9243140816688538
          vf_loss: 0.14693250015488335
    num_agent_steps_sampled: 5397750
    num_agent_steps_trained: 5397750
    num_steps_sampled: 5397750
    num_steps_trained: 53

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,600,304979,5397750,5.10146,17.46,-0.53,53.9405


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5407746
  custom_metrics: {}
  date: 2021-11-22_07-40-38
  done: false
  episode_len_mean: 54.5
  episode_media: {}
  episode_reward_max: 13.630000000000008
  episode_reward_mean: 5.122307692307697
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 182
  episodes_total: 105939
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.027273152846409
          entropy_coeff: 0.01
          kl: 0.014406627430335205
          policy_loss: -0.06860850171291984
          total_loss: 0.07342707194408163
          vf_explained_var: 0.9370869994163513
          vf_loss: 0.12948820572093983
    num_agent_steps_sampled: 5407746
    num_agent_steps_trained: 5407746
    num_steps_sampled: 5407746
    num_steps_trained: 5407746
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,601,305516,5407746,5.12231,13.63,-0.51,54.5




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5417742
  custom_metrics: {}
  date: 2021-11-22_07-49-57
  done: false
  episode_len_mean: 54.145161290322584
  episode_media: {}
  episode_reward_max: 15.49999999999999
  episode_reward_mean: 4.961612903225811
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 186
  episodes_total: 106125
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0205848572723357
          entropy_coeff: 0.01
          kl: 0.01534557207916642
          policy_loss: -0.07141521869623348
          total_loss: 0.08996324663333337
          vf_explained_var: 0.9304758906364441
          vf_loss: 0.14662518190747373
    num_agent_steps_sampled: 5417742
    num_agent_steps_trained: 5417742
    num_steps_sampled: 5417742
    num_steps_trained: 541

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,602,306075,5417742,4.96161,15.5,-0.5,54.1452




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5427738
  custom_metrics: {}
  date: 2021-11-22_07-59-10
  done: false
  episode_len_mean: 53.22872340425532
  episode_media: {}
  episode_reward_max: 13.660000000000005
  episode_reward_mean: 4.669308510638301
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 188
  episodes_total: 106313
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.064345845161193
          entropy_coeff: 0.01
          kl: 0.014183818601034327
          policy_loss: -0.07720824501243971
          total_loss: 0.0630577605684204
          vf_explained_var: 0.9288301467895508
          vf_loss: 0.12859695142108843
    num_agent_steps_sampled: 5427738
    num_agent_steps_trained: 5427738
    num_steps_sampled: 5427738
    num_steps_trained: 5427

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,603,306628,5427738,4.66931,13.66,-0.5,53.2287




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5437734
  custom_metrics: {}
  date: 2021-11-22_08-08-18
  done: false
  episode_len_mean: 54.08152173913044
  episode_media: {}
  episode_reward_max: 17.650000000000002
  episode_reward_mean: 4.79184782608696
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 184
  episodes_total: 106497
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.046438721384868
          entropy_coeff: 0.01
          kl: 0.01483815099199509
          policy_loss: -0.06868987886733799
          total_loss: 0.08707047881250683
          vf_explained_var: 0.9157671928405762
          vf_loss: 0.14242158115310333
    num_agent_steps_sampled: 5437734
    num_agent_steps_trained: 5437734
    num_steps_sampled: 5437734
    num_steps_trained: 54377

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,604,307176,5437734,4.79185,17.65,-0.53,54.0815


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5447730
  custom_metrics: {}
  date: 2021-11-22_08-17-14
  done: false
  episode_len_mean: 53.49197860962567
  episode_media: {}
  episode_reward_max: 17.419999999999977
  episode_reward_mean: 5.131818181818185
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 187
  episodes_total: 106684
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0324119756738823
          entropy_coeff: 0.01
          kl: 0.01523472036776269
          policy_loss: -0.06973653940647447
          total_loss: 0.09772676680957111
          vf_explained_var: 0.937073290348053
          vf_loss: 0.15308082677418344
    num_agent_steps_sampled: 5447730
    num_agent_steps_trained: 5447730
    num_steps_sampled: 5447730
    num_steps_trained: 544

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,605,307712,5447730,5.13182,17.42,-0.48,53.492




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5457726
  custom_metrics: {}
  date: 2021-11-22_08-26-40
  done: false
  episode_len_mean: 53.22872340425532
  episode_media: {}
  episode_reward_max: 15.660000000000005
  episode_reward_mean: 5.1004255319148974
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 188
  episodes_total: 106872
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0557572935240334
          entropy_coeff: 0.01
          kl: 0.01458705275776454
          policy_loss: -0.0704918373678624
          total_loss: 0.08042081950847511
          vf_explained_var: 0.9339661598205566
          vf_loss: 0.13823909836458453
    num_agent_steps_sampled: 5457726
    num_agent_steps_trained: 5457726
    num_steps_sampled: 5457726
    num_steps_trained: 545

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,606,308278,5457726,5.10043,15.66,-0.5,53.2287


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5467722
  custom_metrics: {}
  date: 2021-11-22_08-35-35
  done: false
  episode_len_mean: 53.295698924731184
  episode_media: {}
  episode_reward_max: 21.60999999999998
  episode_reward_mean: 5.261989247311832
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 186
  episodes_total: 107058
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0235125491418033
          entropy_coeff: 0.01
          kl: 0.014921481509498149
          policy_loss: -0.0689877435149973
          total_loss: 0.08533227144157335
          vf_explained_var: 0.9310736060142517
          vf_loss: 0.1405621389047725
    num_agent_steps_sampled: 5467722
    num_agent_steps_trained: 5467722
    num_steps_sampled: 5467722
    num_steps_trained: 5467

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,607,308813,5467722,5.26199,21.61,-0.53,53.2957




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5477718
  custom_metrics: {}
  date: 2021-11-22_08-44-41
  done: false
  episode_len_mean: 54.497297297297294
  episode_media: {}
  episode_reward_max: 17.63
  episode_reward_mean: 4.770648648648652
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 185
  episodes_total: 107243
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0363013741960487
          entropy_coeff: 0.01
          kl: 0.013615475412487116
          policy_loss: -0.06298168142674253
          total_loss: 0.070054648578539
          vf_explained_var: 0.9493324756622314
          vf_loss: 0.12238158718159191
    num_agent_steps_sampled: 5477718
    num_agent_steps_trained: 5477718
    num_steps_sampled: 5477718
    num_steps_trained: 5477718
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,608,309359,5477718,4.77065,17.63,-0.5,54.4973


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5487714
  custom_metrics: {}
  date: 2021-11-22_08-53-35
  done: false
  episode_len_mean: 53.62566844919786
  episode_media: {}
  episode_reward_max: 17.38999999999998
  episode_reward_mean: 5.226577540106956
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 187
  episodes_total: 107430
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0350410780753476
          entropy_coeff: 0.01
          kl: 0.014658741847039702
          policy_loss: -0.07075785138197874
          total_loss: 0.08399719313939609
          vf_explained_var: 0.9449756741523743
          vf_loss: 0.14171100874940107
    num_agent_steps_sampled: 5487714
    num_agent_steps_trained: 5487714
    num_steps_sampled: 5487714
    num_steps_trained: 548

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,609,309893,5487714,5.22658,17.39,-0.52,53.6257




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5497710
  custom_metrics: {}
  date: 2021-11-22_09-02-56
  done: false
  episode_len_mean: 53.3048128342246
  episode_media: {}
  episode_reward_max: 15.620000000000008
  episode_reward_mean: 5.346898395721929
  episode_reward_min: -0.37000000000000016
  episodes_this_iter: 187
  episodes_total: 107617
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0466857049838607
          entropy_coeff: 0.01
          kl: 0.014398658228977549
          policy_loss: -0.07366333280440586
          total_loss: 0.07569961505327202
          vf_explained_var: 0.9418825507164001
          vf_loss: 0.1370278612002693
    num_agent_steps_sampled: 5497710
    num_agent_steps_trained: 5497710
    num_steps_sampled: 5497710
    num_steps_trained: 549

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,610,310454,5497710,5.3469,15.62,-0.37,53.3048


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5507706
  custom_metrics: {}
  date: 2021-11-22_09-11-50
  done: false
  episode_len_mean: 53.18617021276596
  episode_media: {}
  episode_reward_max: 17.589999999999986
  episode_reward_mean: 5.509414893617026
  episode_reward_min: -0.4300000000000002
  episodes_this_iter: 188
  episodes_total: 107805
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0283674759558408
          entropy_coeff: 0.01
          kl: 0.015121195181722343
          policy_loss: -0.07483540449553606
          total_loss: 0.08830497669749816
          vf_explained_var: 0.9344483613967896
          vf_loss: 0.1489760821683325
    num_agent_steps_sampled: 5507706
    num_agent_steps_trained: 5507706
    num_steps_sampled: 5507706
    num_steps_trained: 550

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,611,310988,5507706,5.50941,17.59,-0.43,53.1862


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5517702
  custom_metrics: {}
  date: 2021-11-22_09-20-43
  done: false
  episode_len_mean: 54.40217391304348
  episode_media: {}
  episode_reward_max: 15.660000000000005
  episode_reward_mean: 5.2018478260869605
  episode_reward_min: -0.4200000000000002
  episodes_this_iter: 184
  episodes_total: 107989
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0339321898887435
          entropy_coeff: 0.01
          kl: 0.015493224001479242
          policy_loss: -0.07118085039939294
          total_loss: 0.09949853077614315
          vf_explained_var: 0.9304167628288269
          vf_loss: 0.15572320093703737
    num_agent_steps_sampled: 5517702
    num_agent_steps_trained: 5517702
    num_steps_sampled: 5517702
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,612,311521,5517702,5.20185,15.66,-0.42,54.4022




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5527698
  custom_metrics: {}
  date: 2021-11-22_09-29-52
  done: false
  episode_len_mean: 53.56216216216216
  episode_media: {}
  episode_reward_max: 15.590000000000007
  episode_reward_mean: 5.269189189189194
  episode_reward_min: -0.5700000000000003
  episodes_this_iter: 185
  episodes_total: 108174
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0289108605509303
          entropy_coeff: 0.01
          kl: 0.014269374554211707
          policy_loss: -0.07223573937155287
          total_loss: 0.06594677298171372
          vf_explained_var: 0.9423051476478577
          vf_loss: 0.12596419945716425
    num_agent_steps_sampled: 5527698
    num_agent_steps_trained: 5527698
    num_steps_sampled: 5527698
    num_steps_trained: 55

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,613,312070,5527698,5.26919,15.59,-0.57,53.5622




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5537694
  custom_metrics: {}
  date: 2021-11-22_09-39-16
  done: false
  episode_len_mean: 52.49738219895288
  episode_media: {}
  episode_reward_max: 19.60999999999998
  episode_reward_mean: 5.318848167539271
  episode_reward_min: -0.5300000000000005
  episodes_this_iter: 191
  episodes_total: 108365
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.019457296147404
          entropy_coeff: 0.01
          kl: 0.014555778444647394
          policy_loss: -0.0719682447468169
          total_loss: 0.07623833232263784
          vf_explained_var: 0.9418643116950989
          vf_loss: 0.1352412653557702
    num_agent_steps_sampled: 5537694
    num_agent_steps_trained: 5537694
    num_steps_sampled: 5537694
    num_steps_trained: 553769

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,614,312633,5537694,5.31885,19.61,-0.53,52.4974


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5547690
  custom_metrics: {}
  date: 2021-11-22_09-48-13
  done: false
  episode_len_mean: 53.34574468085106
  episode_media: {}
  episode_reward_max: 17.630000000000006
  episode_reward_mean: 5.114095744680855
  episode_reward_min: -0.47000000000000025
  episodes_this_iter: 188
  episodes_total: 108553
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.030236178038111
          entropy_coeff: 0.01
          kl: 0.014654211889326267
          policy_loss: -0.0678615728957661
          total_loss: 0.07795415303277903
          vf_explained_var: 0.9458229541778564
          vf_loss: 0.13273396138840515
    num_agent_steps_sampled: 5547690
    num_agent_steps_trained: 5547690
    num_steps_sampled: 5547690
    num_steps_trained: 554

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,615,313170,5547690,5.1141,17.63,-0.47,53.3457




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5557686
  custom_metrics: {}
  date: 2021-11-22_09-57-34
  done: false
  episode_len_mean: 53.483870967741936
  episode_media: {}
  episode_reward_max: 15.44000000000001
  episode_reward_mean: 4.760860215053767
  episode_reward_min: -0.6300000000000003
  episodes_this_iter: 186
  episodes_total: 108739
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0689973209038315
          entropy_coeff: 0.01
          kl: 0.014211749814968993
          policy_loss: -0.06524371761828018
          total_loss: 0.06952941276086516
          vf_explained_var: 0.9265385866165161
          vf_loss: 0.12308696145051529
    num_agent_steps_sampled: 5557686
    num_agent_steps_trained: 5557686
    num_steps_sampled: 5557686
    num_steps_trained: 55

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,616,313732,5557686,4.76086,15.44,-0.63,53.4839


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5567682
  custom_metrics: {}
  date: 2021-11-22_10-06-29
  done: false
  episode_len_mean: 53.74331550802139
  episode_media: {}
  episode_reward_max: 15.570000000000006
  episode_reward_mean: 5.264117647058828
  episode_reward_min: -0.5700000000000003
  episodes_this_iter: 187
  episodes_total: 108926
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0259234954554395
          entropy_coeff: 0.01
          kl: 0.01450759785873209
          policy_loss: -0.0699722961162099
          total_loss: 0.07810341786502702
          vf_explained_var: 0.9431240558624268
          vf_loss: 0.13528482703207786
    num_agent_steps_sampled: 5567682
    num_agent_steps_trained: 5567682
    num_steps_sampled: 5567682
    num_steps_trained: 5567

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,617,314267,5567682,5.26412,15.57,-0.57,53.7433




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5577678
  custom_metrics: {}
  date: 2021-11-22_10-15-37
  done: false
  episode_len_mean: 53.87567567567567
  episode_media: {}
  episode_reward_max: 13.580000000000007
  episode_reward_mean: 5.2916756756756795
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 185
  episodes_total: 109111
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0360011476589492
          entropy_coeff: 0.01
          kl: 0.014240027382459906
          policy_loss: -0.06883132238560856
          total_loss: 0.07165834971071953
          vf_explained_var: 0.9427934288978577
          vf_loss: 0.1284091200152165
    num_agent_steps_sampled: 5577678
    num_agent_steps_trained: 5577678
    num_steps_sampled: 5577678
    num_steps_trained: 55

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,618,314814,5577678,5.29168,13.58,-0.5,53.8757




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5587674
  custom_metrics: {}
  date: 2021-11-22_10-24-51
  done: false
  episode_len_mean: 53.711229946524064
  episode_media: {}
  episode_reward_max: 15.580000000000007
  episode_reward_mean: 5.3841711229946565
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 187
  episodes_total: 109298
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0204595423606504
          entropy_coeff: 0.01
          kl: 0.015454212054316095
          policy_loss: -0.0667406166084019
          total_loss: 0.08846235365826174
          vf_explained_var: 0.9280555844306946
          vf_loss: 0.1402009362267914
    num_agent_steps_sampled: 5587674
    num_agent_steps_trained: 5587674
    num_steps_sampled: 5587674
    num_steps_trained: 55

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,619,315369,5587674,5.38417,15.58,-0.54,53.7112




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5597670
  custom_metrics: {}
  date: 2021-11-22_10-33-57
  done: false
  episode_len_mean: 54.93922651933702
  episode_media: {}
  episode_reward_max: 15.600000000000007
  episode_reward_mean: 5.409613259668514
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 181
  episodes_total: 109479
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0132028547396144
          entropy_coeff: 0.01
          kl: 0.0148503126143704
          policy_loss: -0.06818616537165345
          total_loss: 0.08901686941473205
          vf_explained_var: 0.9477107524871826
          vf_loss: 0.14350419328856584
    num_agent_steps_sampled: 5597670
    num_agent_steps_trained: 5597670
    num_steps_sampled: 5597670
    num_steps_trained: 559

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,620,315914,5597670,5.40961,15.6,-0.48,54.9392




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5607666
  custom_metrics: {}
  date: 2021-11-22_10-43-07
  done: false
  episode_len_mean: 54.22162162162162
  episode_media: {}
  episode_reward_max: 15.700000000000005
  episode_reward_mean: 5.017189189189193
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 185
  episodes_total: 109664
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0187937355663883
          entropy_coeff: 0.01
          kl: 0.014205614757974487
          policy_loss: -0.06951712380803246
          total_loss: 0.06884473775793609
          vf_explained_var: 0.9296038746833801
          vf_loss: 0.1261876309685125
    num_agent_steps_sampled: 5607666
    num_agent_steps_trained: 5607666
    num_steps_sampled: 5607666
    num_steps_trained: 560

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,621,316465,5607666,5.01719,15.7,-0.52,54.2216




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5617662
  custom_metrics: {}
  date: 2021-11-22_10-52-15
  done: false
  episode_len_mean: 54.650273224043715
  episode_media: {}
  episode_reward_max: 15.680000000000005
  episode_reward_mean: 5.443989071038256
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 183
  episodes_total: 109847
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0156460904452693
          entropy_coeff: 0.01
          kl: 0.014750372803523551
          policy_loss: -0.06701750884820058
          total_loss: 0.0903905593652499
          vf_explained_var: 0.9430559873580933
          vf_loss: 0.1439613340049421
    num_agent_steps_sampled: 5617662
    num_agent_steps_trained: 5617662
    num_steps_sampled: 5617662
    num_steps_trained: 561

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,622,317012,5617662,5.44399,15.68,-0.51,54.6503




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5627658
  custom_metrics: {}
  date: 2021-11-22_11-01-24
  done: false
  episode_len_mean: 53.42780748663102
  episode_media: {}
  episode_reward_max: 19.709999999999987
  episode_reward_mean: 5.6645989304812865
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 187
  episodes_total: 110034
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.007639910585909
          entropy_coeff: 0.01
          kl: 0.014104014621843313
          policy_loss: -0.06636508281283104
          total_loss: 0.08370809897122997
          vf_explained_var: 0.9416435360908508
          vf_loss: 0.1380188720602819
    num_agent_steps_sampled: 5627658
    num_agent_steps_trained: 5627658
    num_steps_sampled: 5627658
    num_steps_trained: 562

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,623,317562,5627658,5.6646,19.71,-0.53,53.4278




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5637654
  custom_metrics: {}
  date: 2021-11-22_11-10-35
  done: false
  episode_len_mean: 53.564516129032256
  episode_media: {}
  episode_reward_max: 15.660000000000005
  episode_reward_mean: 5.103763440860219
  episode_reward_min: -0.5800000000000003
  episodes_this_iter: 186
  episodes_total: 110220
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0510344182152345
          entropy_coeff: 0.01
          kl: 0.014145555263098474
          policy_loss: -0.07168102174544759
          total_loss: 0.07041823235595221
          vf_explained_var: 0.9333083033561707
          vf_loss: 0.1303842543513069
    num_agent_steps_sampled: 5637654
    num_agent_steps_trained: 5637654
    num_steps_sampled: 5637654
    num_steps_trained: 56

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,624,318112,5637654,5.10376,15.66,-0.58,53.5645


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5647650
  custom_metrics: {}
  date: 2021-11-22_11-19-31
  done: false
  episode_len_mean: 53.340425531914896
  episode_media: {}
  episode_reward_max: 15.570000000000007
  episode_reward_mean: 5.16425531914894
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 188
  episodes_total: 110408
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.055309332733652
          entropy_coeff: 0.01
          kl: 0.01466209343312654
          policy_loss: -0.06896529878303626
          total_loss: 0.0907270353257982
          vf_explained_var: 0.9342118501663208
          vf_loss: 0.14684334485230585
    num_agent_steps_sampled: 5647650
    num_agent_steps_trained: 5647650
    num_steps_sampled: 5647650
    num_steps_trained: 56476

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,625,318649,5647650,5.16426,15.57,-0.5,53.3404




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5657646
  custom_metrics: {}
  date: 2021-11-22_11-28-42
  done: false
  episode_len_mean: 53.82702702702703
  episode_media: {}
  episode_reward_max: 15.600000000000007
  episode_reward_mean: 5.140918918918923
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 185
  episodes_total: 110593
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.022221385068204
          entropy_coeff: 0.01
          kl: 0.014153416125631781
          policy_loss: -0.07186874386523955
          total_loss: 0.08135208240790337
          vf_explained_var: 0.9394128322601318
          vf_loss: 0.14119978609324685
    num_agent_steps_sampled: 5657646
    num_agent_steps_trained: 5657646
    num_steps_sampled: 5657646
    num_steps_trained: 565

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,626,319199,5657646,5.14092,15.6,-0.54,53.827




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5667642
  custom_metrics: {}
  date: 2021-11-22_11-37-46
  done: false
  episode_len_mean: 54.622950819672134
  episode_media: {}
  episode_reward_max: 19.559999999999995
  episode_reward_mean: 5.565901639344267
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 183
  episodes_total: 110776
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0209379694069245
          entropy_coeff: 0.01
          kl: 0.015137634334561813
          policy_loss: -0.06981690260245488
          total_loss: 0.09143370963681655
          vf_explained_var: 0.9421535730361938
          vf_loss: 0.14697456769710668
    num_agent_steps_sampled: 5667642
    num_agent_steps_trained: 5667642
    num_steps_sampled: 5667642
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,627,319743,5667642,5.5659,19.56,-0.53,54.623




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5677638
  custom_metrics: {}
  date: 2021-11-22_11-47-09
  done: false
  episode_len_mean: 53.44919786096256
  episode_media: {}
  episode_reward_max: 17.73
  episode_reward_mean: 4.998449197860967
  episode_reward_min: -0.6400000000000003
  episodes_this_iter: 187
  episodes_total: 110963
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0484623287815644
          entropy_coeff: 0.01
          kl: 0.014184546723640773
          policy_loss: -0.06710270604698561
          total_loss: 0.0708659978407634
          vf_explained_var: 0.9343231320381165
          vf_loss: 0.1261391550862608
    num_agent_steps_sampled: 5677638
    num_agent_steps_trained: 5677638
    num_steps_sampled: 5677638
    num_steps_trained: 5677638
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,628,320307,5677638,4.99845,17.73,-0.64,53.4492




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5687634
  custom_metrics: {}
  date: 2021-11-22_11-56-23
  done: false
  episode_len_mean: 53.715053763440864
  episode_media: {}
  episode_reward_max: 15.610000000000007
  episode_reward_mean: 5.087311827956993
  episode_reward_min: -0.5600000000000003
  episodes_this_iter: 186
  episodes_total: 111149
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.07068598323079
          entropy_coeff: 0.01
          kl: 0.014134942009888993
          policy_loss: -0.07107662363670977
          total_loss: 0.06350574944163713
          vf_explained_var: 0.9457156658172607
          vf_loss: 0.12308806689777767
    num_agent_steps_sampled: 5687634
    num_agent_steps_trained: 5687634
    num_steps_sampled: 5687634
    num_steps_trained: 568

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,629,320860,5687634,5.08731,15.61,-0.56,53.7151




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5697630
  custom_metrics: {}
  date: 2021-11-22_12-05-30
  done: false
  episode_len_mean: 53.32804232804233
  episode_media: {}
  episode_reward_max: 13.650000000000006
  episode_reward_mean: 4.898835978835983
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 189
  episodes_total: 111338
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0586105678215563
          entropy_coeff: 0.01
          kl: 0.016009641641656636
          policy_loss: -0.06972239875010372
          total_loss: 0.0957878568420148
          vf_explained_var: 0.9020230174064636
          vf_loss: 0.14962439635828376
    num_agent_steps_sampled: 5697630
    num_agent_steps_trained: 5697630
    num_steps_sampled: 5697630
    num_steps_trained: 569

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,630,321408,5697630,4.89884,13.65,-0.53,53.328




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5707626
  custom_metrics: {}
  date: 2021-11-22_12-14-34
  done: false
  episode_len_mean: 53.76086956521739
  episode_media: {}
  episode_reward_max: 15.650000000000004
  episode_reward_mean: 5.1813043478260905
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 184
  episodes_total: 111522
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0546873615927486
          entropy_coeff: 0.01
          kl: 0.014697528952971297
          policy_loss: -0.07082512186487541
          total_loss: 0.0761969726915763
          vf_explained_var: 0.9422051310539246
          vf_loss: 0.13408615878936517
    num_agent_steps_sampled: 5707626
    num_agent_steps_trained: 5707626
    num_steps_sampled: 5707626
    num_steps_trained: 57

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,631,321951,5707626,5.1813,15.65,-0.54,53.7609


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5717622
  custom_metrics: {}
  date: 2021-11-22_12-23-27
  done: false
  episode_len_mean: 53.90909090909091
  episode_media: {}
  episode_reward_max: 19.58999999999999
  episode_reward_mean: 5.573957219251342
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 187
  episodes_total: 111709
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.031304210232922
          entropy_coeff: 0.01
          kl: 0.014939853747545724
          policy_loss: -0.06969654389481493
          total_loss: 0.09761323241232221
          vf_explained_var: 0.9360416531562805
          vf_loss: 0.15358796305701988
    num_agent_steps_sampled: 5717622
    num_agent_steps_trained: 5717622
    num_steps_sampled: 5717622
    num_steps_trained: 5717

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,632,322484,5717622,5.57396,19.59,-0.54,53.9091




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5727618
  custom_metrics: {}
  date: 2021-11-22_12-32-33
  done: false
  episode_len_mean: 53.45989304812834
  episode_media: {}
  episode_reward_max: 17.42000000000001
  episode_reward_mean: 5.394866310160432
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 187
  episodes_total: 111896
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0440923068178707
          entropy_coeff: 0.01
          kl: 0.014239969505140589
          policy_loss: -0.07177902263020627
          total_loss: 0.07834613110547409
          vf_explained_var: 0.9410803318023682
          vf_loss: 0.1381256437692118
    num_agent_steps_sampled: 5727618
    num_agent_steps_trained: 5727618
    num_steps_sampled: 5727618
    num_steps_trained: 5727

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,633,323030,5727618,5.39487,17.42,-0.52,53.4599




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5737614
  custom_metrics: {}
  date: 2021-11-22_12-41-56
  done: false
  episode_len_mean: 52.24083769633508
  episode_media: {}
  episode_reward_max: 15.520000000000008
  episode_reward_mean: 4.928219895287962
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 191
  episodes_total: 112087
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.060124005275558
          entropy_coeff: 0.01
          kl: 0.014762086289481403
          policy_loss: -0.06911826990372394
          total_loss: 0.08667335985281893
          vf_explained_var: 0.9290012717247009
          vf_loss: 0.14276299077791188
    num_agent_steps_sampled: 5737614
    num_agent_steps_trained: 5737614
    num_steps_sampled: 5737614
    num_steps_trained: 573

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,634,323592,5737614,4.92822,15.52,-0.55,52.2408


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5747610
  custom_metrics: {}
  date: 2021-11-22_12-50-46
  done: false
  episode_len_mean: 53.60215053763441
  episode_media: {}
  episode_reward_max: 19.46999999999998
  episode_reward_mean: 5.319946236559144
  episode_reward_min: -0.6500000000000004
  episodes_this_iter: 186
  episodes_total: 112273
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0326806343942283
          entropy_coeff: 0.01
          kl: 0.014699189857102389
          policy_loss: -0.0700087514384561
          total_loss: 0.09034569188499306
          vf_explained_var: 0.9118105173110962
          vf_loss: 0.1471946560020323
    num_agent_steps_sampled: 5747610
    num_agent_steps_trained: 5747610
    num_steps_sampled: 5747610
    num_steps_trained: 57476

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,635,324123,5747610,5.31995,19.47,-0.65,53.6022


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5757606
  custom_metrics: {}
  date: 2021-11-22_12-59-40
  done: false
  episode_len_mean: 54.07027027027027
  episode_media: {}
  episode_reward_max: 15.600000000000007
  episode_reward_mean: 4.9091351351351395
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 185
  episodes_total: 112458
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0645296943474967
          entropy_coeff: 0.01
          kl: 0.014517610338668142
          policy_loss: -0.07059492946058431
          total_loss: 0.08322471263472926
          vf_explained_var: 0.9308857321739197
          vf_loss: 0.14139200796936097
    num_agent_steps_sampled: 5757606
    num_agent_steps_trained: 5757606
    num_steps_sampled: 5757606
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,636,324657,5757606,4.90914,15.6,-0.51,54.0703




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5767602
  custom_metrics: {}
  date: 2021-11-22_13-08-45
  done: false
  episode_len_mean: 54.358695652173914
  episode_media: {}
  episode_reward_max: 15.610000000000007
  episode_reward_mean: 4.915217391304352
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 184
  episodes_total: 112642
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0707705536520624
          entropy_coeff: 0.01
          kl: 0.014370865645489084
          policy_loss: -0.07048014069017129
          total_loss: 0.06864415643952793
          vf_explained_var: 0.9364398717880249
          vf_loss: 0.12709337297114487
    num_agent_steps_sampled: 5767602
    num_agent_steps_trained: 5767602
    num_steps_sampled: 5767602
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,637,325202,5767602,4.91522,15.61,-0.48,54.3587




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5777598
  custom_metrics: {}
  date: 2021-11-22_13-18-03
  done: false
  episode_len_mean: 54.037837837837834
  episode_media: {}
  episode_reward_max: 15.600000000000005
  episode_reward_mean: 5.236864864864869
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 185
  episodes_total: 112827
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.044213943404845
          entropy_coeff: 0.01
          kl: 0.014481446160547142
          policy_loss: -0.07344497907720626
          total_loss: 0.0698556049296906
          vf_explained_var: 0.9371852874755859
          vf_loss: 0.1307521779849437
    num_agent_steps_sampled: 5777598
    num_agent_steps_trained: 5777598
    num_steps_sampled: 5777598
    num_steps_trained: 5777

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,638,325760,5777598,5.23686,15.6,-0.55,54.0378




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5787594
  custom_metrics: {}
  date: 2021-11-22_13-27-28
  done: false
  episode_len_mean: 53.31550802139037
  episode_media: {}
  episode_reward_max: 15.550000000000008
  episode_reward_mean: 5.207540106951876
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 187
  episodes_total: 113014
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0802018348711084
          entropy_coeff: 0.01
          kl: 0.014746220133164178
          policy_loss: -0.07213711098613941
          total_loss: 0.07383041879204329
          vf_explained_var: 0.9271757006645203
          vf_loss: 0.133175814406288
    num_agent_steps_sampled: 5787594
    num_agent_steps_trained: 5787594
    num_steps_sampled: 5787594
    num_steps_trained: 5787

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,639,326325,5787594,5.20754,15.55,-0.53,53.3155


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5797590
  custom_metrics: {}
  date: 2021-11-22_13-36-22
  done: false
  episode_len_mean: 53.29100529100529
  episode_media: {}
  episode_reward_max: 17.580000000000005
  episode_reward_mean: 5.196772486772491
  episode_reward_min: -0.45000000000000023
  episodes_this_iter: 189
  episodes_total: 113203
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.078857628887437
          entropy_coeff: 0.01
          kl: 0.014608663955567008
          policy_loss: -0.06367091410251027
          total_loss: 0.08518996936962205
          vf_explained_var: 0.9324466586112976
          vf_loss: 0.1363690966347524
    num_agent_steps_sampled: 5797590
    num_agent_steps_trained: 5797590
    num_steps_sampled: 5797590
    num_steps_trained: 579

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,640,326859,5797590,5.19677,17.58,-0.45,53.291




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5807586
  custom_metrics: {}
  date: 2021-11-22_13-45-30
  done: false
  episode_len_mean: 53.18817204301075
  episode_media: {}
  episode_reward_max: 17.579999999999988
  episode_reward_mean: 5.150645161290327
  episode_reward_min: -0.6500000000000004
  episodes_this_iter: 186
  episodes_total: 113389
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.051083304628311
          entropy_coeff: 0.01
          kl: 0.014672607417047996
          policy_loss: -0.0640330535692164
          total_loss: 0.09336216807842285
          vf_explained_var: 0.9289901852607727
          vf_loss: 0.144480019003194
    num_agent_steps_sampled: 5807586
    num_agent_steps_trained: 5807586
    num_steps_sampled: 5807586
    num_steps_trained: 580758

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,641,327407,5807586,5.15065,17.58,-0.65,53.1882




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5817582
  custom_metrics: {}
  date: 2021-11-22_13-54-35
  done: false
  episode_len_mean: 53.53191489361702
  episode_media: {}
  episode_reward_max: 13.610000000000007
  episode_reward_mean: 5.351702127659578
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 188
  episodes_total: 113577
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0614491538350364
          entropy_coeff: 0.01
          kl: 0.014772933633116237
          policy_loss: -0.06824270391215756
          total_loss: 0.09138922318361803
          vf_explained_var: 0.9476010203361511
          vf_loss: 0.14659182752536826
    num_agent_steps_sampled: 5817582
    num_agent_steps_trained: 5817582
    num_steps_sampled: 5817582
    num_steps_trained: 58

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,642,327952,5817582,5.3517,13.61,-0.54,53.5319




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5827578
  custom_metrics: {}
  date: 2021-11-22_14-03-41
  done: false
  episode_len_mean: 53.42780748663102
  episode_media: {}
  episode_reward_max: 17.53000000000001
  episode_reward_mean: 5.156203208556153
  episode_reward_min: -0.4400000000000002
  episodes_this_iter: 187
  episodes_total: 113764
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.082246882585158
          entropy_coeff: 0.01
          kl: 0.01439145079026499
          policy_loss: -0.07332694528642064
          total_loss: 0.0631520707272443
          vf_explained_var: 0.9387286305427551
          vf_loss: 0.12451596116557358
    num_agent_steps_sampled: 5827578
    num_agent_steps_trained: 5827578
    num_steps_sampled: 5827578
    num_steps_trained: 582757

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,643,328498,5827578,5.1562,17.53,-0.44,53.4278




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5837574
  custom_metrics: {}
  date: 2021-11-22_14-12-49
  done: false
  episode_len_mean: 52.857142857142854
  episode_media: {}
  episode_reward_max: 15.570000000000007
  episode_reward_mean: 5.574920634920639
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 189
  episodes_total: 113953
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0528514152071082
          entropy_coeff: 0.01
          kl: 0.015026263761694705
          policy_loss: -0.06932870123619433
          total_loss: 0.08167062579946745
          vf_explained_var: 0.9373794198036194
          vf_loss: 0.13729613333506638
    num_agent_steps_sampled: 5837574
    num_agent_steps_trained: 5837574
    num_steps_sampled: 5837574
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,644,329045,5837574,5.57492,15.57,-0.52,52.8571




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5847570
  custom_metrics: {}
  date: 2021-11-22_14-21-55
  done: false
  episode_len_mean: 53.39572192513369
  episode_media: {}
  episode_reward_max: 17.380000000000003
  episode_reward_mean: 4.968074866310165
  episode_reward_min: -0.5900000000000003
  episodes_this_iter: 187
  episodes_total: 114140
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0503187716964737
          entropy_coeff: 0.01
          kl: 0.014803501904565818
          policy_loss: -0.07311417542581233
          total_loss: 0.06888060736971703
          vf_explained_var: 0.9387772083282471
          vf_loss: 0.12877374190578134
    num_agent_steps_sampled: 5847570
    num_agent_steps_trained: 5847570
    num_steps_sampled: 5847570
    num_steps_trained: 58

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,645,329592,5847570,4.96807,17.38,-0.59,53.3957




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5857566
  custom_metrics: {}
  date: 2021-11-22_14-31-04
  done: false
  episode_len_mean: 53.48663101604278
  episode_media: {}
  episode_reward_max: 15.47000000000001
  episode_reward_mean: 5.138930481283427
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 187
  episodes_total: 114327
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0844428091642846
          entropy_coeff: 0.01
          kl: 0.014568479781745077
          policy_loss: -0.07127328626954978
          total_loss: 0.06617512951111952
          vf_explained_var: 0.9442251324653625
          vf_loss: 0.12510402441542806
    num_agent_steps_sampled: 5857566
    num_agent_steps_trained: 5857566
    num_steps_sampled: 5857566
    num_steps_trained: 58

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,646,330140,5857566,5.13893,15.47,-0.49,53.4866




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5867562
  custom_metrics: {}
  date: 2021-11-22_14-40-12
  done: false
  episode_len_mean: 53.34574468085106
  episode_media: {}
  episode_reward_max: 15.510000000000009
  episode_reward_mean: 5.12202127659575
  episode_reward_min: -0.4100000000000002
  episodes_this_iter: 188
  episodes_total: 114515
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.06207298650799
          entropy_coeff: 0.01
          kl: 0.015020257603292253
          policy_loss: -0.07087512940171478
          total_loss: 0.07749730882819403
          vf_explained_var: 0.9476809501647949
          vf_loss: 0.13477514264011103
    num_agent_steps_sampled: 5867562
    num_agent_steps_trained: 5867562
    num_steps_sampled: 5867562
    num_steps_trained: 58675

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,647,330688,5867562,5.12202,15.51,-0.41,53.3457




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5877558
  custom_metrics: {}
  date: 2021-11-22_14-49-29
  done: false
  episode_len_mean: 54.108695652173914
  episode_media: {}
  episode_reward_max: 17.569999999999993
  episode_reward_mean: 5.1511413043478305
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 184
  episodes_total: 114699
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.061803642980545
          entropy_coeff: 0.01
          kl: 0.014635073111698616
          policy_loss: -0.07021942718449399
          total_loss: 0.0662292999057077
          vf_explained_var: 0.9224540591239929
          vf_loss: 0.12372623640561409
    num_agent_steps_sampled: 5877558
    num_agent_steps_trained: 5877558
    num_steps_sampled: 5877558
    num_steps_trained: 58

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,648,331245,5877558,5.15114,17.57,-0.51,54.1087




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5887554
  custom_metrics: {}
  date: 2021-11-22_14-58-33
  done: false
  episode_len_mean: 53.33510638297872
  episode_media: {}
  episode_reward_max: 15.640000000000006
  episode_reward_mean: 5.410319148936176
  episode_reward_min: -0.4900000000000002
  episodes_this_iter: 188
  episodes_total: 114887
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.044160015156949
          entropy_coeff: 0.01
          kl: 0.014714441968525218
          policy_loss: -0.07050545046903166
          total_loss: 0.07375341264081604
          vf_explained_var: 0.9425032138824463
          vf_loss: 0.13117912370350632
    num_agent_steps_sampled: 5887554
    num_agent_steps_trained: 5887554
    num_steps_sampled: 5887554
    num_steps_trained: 588

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,649,331789,5887554,5.41032,15.64,-0.49,53.3351




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5897550
  custom_metrics: {}
  date: 2021-11-22_15-07-39
  done: false
  episode_len_mean: 53.32446808510638
  episode_media: {}
  episode_reward_max: 15.530000000000008
  episode_reward_mean: 4.714468085106386
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 188
  episodes_total: 115075
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.093902541188351
          entropy_coeff: 0.01
          kl: 0.014595602867384328
          policy_loss: -0.07012477206532372
          total_loss: 0.06492789493204182
          vf_explained_var: 0.9350862503051758
          vf_loss: 0.12274108387597174
    num_agent_steps_sampled: 5897550
    num_agent_steps_trained: 5897550
    num_steps_sampled: 5897550
    num_steps_trained: 58

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,650,332336,5897550,4.71447,15.53,-0.49,53.3245


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5907546
  custom_metrics: {}
  date: 2021-11-22_15-16-36
  done: false
  episode_len_mean: 52.755319148936174
  episode_media: {}
  episode_reward_max: 17.649999999999988
  episode_reward_mean: 5.000425531914898
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 188
  episodes_total: 115263
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0695660971494085
          entropy_coeff: 0.01
          kl: 0.014514591564959847
          policy_loss: -0.07250082939639667
          total_loss: 0.06595945619450519
          vf_explained_var: 0.9145945310592651
          vf_loss: 0.12608989126350073
    num_agent_steps_sampled: 5907546
    num_agent_steps_trained: 5907546
    num_steps_sampled: 5907546
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,651,332872,5907546,5.00043,17.65,-0.51,52.7553




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5917542
  custom_metrics: {}
  date: 2021-11-22_15-25-53
  done: false
  episode_len_mean: 53.70967741935484
  episode_media: {}
  episode_reward_max: 17.70999999999999
  episode_reward_mean: 5.015913978494629
  episode_reward_min: -0.5700000000000003
  episodes_this_iter: 186
  episodes_total: 115449
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.075002136646983
          entropy_coeff: 0.01
          kl: 0.01494236150191768
          policy_loss: -0.06968459666412778
          total_loss: 0.07816935969985789
          vf_explained_var: 0.9204039573669434
          vf_loss: 0.13456340891367055
    num_agent_steps_sampled: 5917542
    num_agent_steps_trained: 5917542
    num_steps_sampled: 5917542
    num_steps_trained: 59175

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,652,333430,5917542,5.01591,17.71,-0.57,53.7097




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5927538
  custom_metrics: {}
  date: 2021-11-22_15-35-14
  done: false
  episode_len_mean: 53.9144385026738
  episode_media: {}
  episode_reward_max: 17.699999999999996
  episode_reward_mean: 5.46877005347594
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 187
  episodes_total: 115636
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.065086731087252
          entropy_coeff: 0.01
          kl: 0.01518973819547063
          policy_loss: -0.06741425976680515
          total_loss: 0.08836539406229532
          vf_explained_var: 0.9458716511726379
          vf_loss: 0.14182639793244126
    num_agent_steps_sampled: 5927538
    num_agent_steps_trained: 5927538
    num_steps_sampled: 5927538
    num_steps_trained: 592753

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,653,333991,5927538,5.46877,17.7,-0.5,53.9144


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5937534
  custom_metrics: {}
  date: 2021-11-22_15-44-12
  done: false
  episode_len_mean: 53.36363636363637
  episode_media: {}
  episode_reward_max: 15.660000000000005
  episode_reward_mean: 5.179679144385031
  episode_reward_min: -0.47000000000000025
  episodes_this_iter: 187
  episodes_total: 115823
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.068321438797985
          entropy_coeff: 0.01
          kl: 0.014086869961140147
          policy_loss: -0.07251523806207916
          total_loss: 0.057982326048509406
          vf_explained_var: 0.9452546238899231
          vf_loss: 0.1190891274254784
    num_agent_steps_sampled: 5937534
    num_agent_steps_trained: 5937534
    num_steps_sampled: 5937534
    num_steps_trained: 59

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,654,334529,5937534,5.17968,15.66,-0.47,53.3636


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5947530
  custom_metrics: {}
  date: 2021-11-22_15-53-11
  done: false
  episode_len_mean: 52.75132275132275
  episode_media: {}
  episode_reward_max: 13.520000000000007
  episode_reward_mean: 5.139153439153442
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 189
  episodes_total: 116012
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0700680453853915
          entropy_coeff: 0.01
          kl: 0.01473445257999453
          policy_loss: -0.07079976189762752
          total_loss: 0.06893858238094706
          vf_explained_var: 0.9421616196632385
          vf_loss: 0.1268720980822464
    num_agent_steps_sampled: 5947530
    num_agent_steps_trained: 5947530
    num_steps_sampled: 5947530
    num_steps_trained: 5947

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,655,335067,5947530,5.13915,13.52,-0.51,52.7513




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5957526
  custom_metrics: {}
  date: 2021-11-22_16-02-43
  done: false
  episode_len_mean: 51.72538860103627
  episode_media: {}
  episode_reward_max: 15.630000000000008
  episode_reward_mean: 5.169170984455962
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 193
  episodes_total: 116205
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0852166741009217
          entropy_coeff: 0.01
          kl: 0.015008508826009482
          policy_loss: -0.07133515223655729
          total_loss: 0.06636290902052244
          vf_explained_var: 0.9381973147392273
          vf_loss: 0.1243589662996319
    num_agent_steps_sampled: 5957526
    num_agent_steps_trained: 5957526
    num_steps_sampled: 5957526
    num_steps_trained: 59

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,656,335640,5957526,5.16917,15.63,-0.49,51.7254




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5967522
  custom_metrics: {}
  date: 2021-11-22_16-11-49
  done: false
  episode_len_mean: 52.642105263157895
  episode_media: {}
  episode_reward_max: 13.640000000000006
  episode_reward_mean: 5.293789473684215
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 190
  episodes_total: 116395
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.054576787340593
          entropy_coeff: 0.01
          kl: 0.015760299179668175
          policy_loss: -0.06638647937204761
          total_loss: 0.087174392636356
          vf_explained_var: 0.9385548233985901
          vf_loss: 0.13820270680968302
    num_agent_steps_sampled: 5967522
    num_agent_steps_trained: 5967522
    num_steps_sampled: 5967522
    num_steps_trained: 596

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,657,336185,5967522,5.29379,13.64,-0.49,52.6421


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5977518
  custom_metrics: {}
  date: 2021-11-22_16-20-46
  done: false
  episode_len_mean: 51.9375
  episode_media: {}
  episode_reward_max: 13.630000000000006
  episode_reward_mean: 5.165520833333338
  episode_reward_min: -0.5900000000000003
  episodes_this_iter: 192
  episodes_total: 116587
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0695187344129784
          entropy_coeff: 0.01
          kl: 0.014511964539438805
          policy_loss: -0.06976058529355733
          total_loss: 0.06477366510866064
          vf_explained_var: 0.9419015049934387
          vf_loss: 0.12216936765878225
    num_agent_steps_sampled: 5977518
    num_agent_steps_trained: 5977518
    num_steps_sampled: 5977518
    num_steps_trained: 5977518
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,658,336722,5977518,5.16552,13.63,-0.59,51.9375




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5987514
  custom_metrics: {}
  date: 2021-11-22_16-30-17
  done: false
  episode_len_mean: 52.58638743455497
  episode_media: {}
  episode_reward_max: 15.600000000000007
  episode_reward_mean: 4.971099476439794
  episode_reward_min: -0.6800000000000004
  episodes_this_iter: 191
  episodes_total: 116778
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0457588323866984
          entropy_coeff: 0.01
          kl: 0.015142494967019802
          policy_loss: -0.06976458057113953
          total_loss: 0.07699660582599889
          vf_explained_var: 0.9319100379943848
          vf_loss: 0.13272227718565896
    num_agent_steps_sampled: 5987514
    num_agent_steps_trained: 5987514
    num_steps_sampled: 5987514
    num_steps_trained: 59

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,659,337293,5987514,4.9711,15.6,-0.68,52.5864


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 5997510
  custom_metrics: {}
  date: 2021-11-22_16-39-12
  done: false
  episode_len_mean: 52.60526315789474
  episode_media: {}
  episode_reward_max: 15.769999999999996
  episode_reward_mean: 5.226421052631584
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 190
  episodes_total: 116968
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0131938160902045
          entropy_coeff: 0.01
          kl: 0.015611705728600008
          policy_loss: -0.07056169195370256
          total_loss: 0.08506807627002062
          vf_explained_var: 0.9280446171760559
          vf_loss: 0.1401962870244782
    num_agent_steps_sampled: 5997510
    num_agent_steps_trained: 5997510
    num_steps_sampled: 5997510
    num_steps_trained: 59

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,660,337828,5997510,5.22642,15.77,-0.48,52.6053




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6007506
  custom_metrics: {}
  date: 2021-11-22_16-48-19
  done: false
  episode_len_mean: 53.22994652406417
  episode_media: {}
  episode_reward_max: 17.569999999999975
  episode_reward_mean: 5.0105347593582925
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 187
  episodes_total: 117155
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0241402749555655
          entropy_coeff: 0.01
          kl: 0.01644539771257621
          policy_loss: -0.065746110233912
          total_loss: 0.09170421181214729
          vf_explained_var: 0.9349476099014282
          vf_loss: 0.1402270520641749
    num_agent_steps_sampled: 6007506
    num_agent_steps_trained: 6007506
    num_steps_sampled: 6007506
    num_steps_trained: 60075

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,661,338375,6007506,5.01053,17.57,-0.51,53.2299




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6017502
  custom_metrics: {}
  date: 2021-11-22_16-57-29
  done: false
  episode_len_mean: 52.74074074074074
  episode_media: {}
  episode_reward_max: 15.560000000000008
  episode_reward_mean: 5.256825396825401
  episode_reward_min: -0.3900000000000002
  episodes_this_iter: 189
  episodes_total: 117344
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0309103561931825
          entropy_coeff: 0.01
          kl: 0.015220019125468847
          policy_loss: -0.06735249589620877
          total_loss: 0.08672154592029867
          vf_explained_var: 0.9409132599830627
          vf_loss: 0.1397100383055819
    num_agent_steps_sampled: 6017502
    num_agent_steps_trained: 6017502
    num_steps_sampled: 6017502
    num_steps_trained: 601

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,662,338925,6017502,5.25683,15.56,-0.39,52.7407




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6027498
  custom_metrics: {}
  date: 2021-11-22_17-06-48
  done: false
  episode_len_mean: 52.49738219895288
  episode_media: {}
  episode_reward_max: 19.689999999999994
  episode_reward_mean: 5.066858638743459
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 191
  episodes_total: 117535
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.042133589896811
          entropy_coeff: 0.01
          kl: 0.015324487308516157
          policy_loss: -0.06846440576432888
          total_loss: 0.0889748728868646
          vf_explained_var: 0.9466001987457275
          vf_loss: 0.1429495158596862
    num_agent_steps_sampled: 6027498
    num_agent_steps_trained: 6027498
    num_steps_sampled: 6027498
    num_steps_trained: 60274

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,663,339484,6027498,5.06686,19.69,-0.51,52.4974




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6037494
  custom_metrics: {}
  date: 2021-11-22_17-15-53
  done: false
  episode_len_mean: 52.584210526315786
  episode_media: {}
  episode_reward_max: 21.709999999999983
  episode_reward_mean: 5.2648421052631615
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 190
  episodes_total: 117725
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0541944934900505
          entropy_coeff: 0.01
          kl: 0.015903458102786955
          policy_loss: -0.06572434778338329
          total_loss: 0.10520183919671183
          vf_explained_var: 0.9404842853546143
          vf_loss: 0.15523806554601943
    num_agent_steps_sampled: 6037494
    num_agent_steps_trained: 6037494
    num_steps_sampled: 6037494
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,664,340029,6037494,5.26484,21.71,-0.51,52.5842


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6047490
  custom_metrics: {}
  date: 2021-11-22_17-24-50
  done: false
  episode_len_mean: 52.397905759162306
  episode_media: {}
  episode_reward_max: 19.509999999999977
  episode_reward_mean: 5.298272251308905
  episode_reward_min: -0.38000000000000017
  episodes_this_iter: 191
  episodes_total: 117916
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.029645023910875
          entropy_coeff: 0.01
          kl: 0.01495006131231702
          policy_loss: -0.0726321384424274
          total_loss: 0.07610651761637544
          vf_explained_var: 0.9578936696052551
          vf_loss: 0.1349769965602444
    num_agent_steps_sampled: 6047490
    num_agent_steps_trained: 6047490
    num_steps_sampled: 6047490
    num_steps_trained: 6047

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,665,340566,6047490,5.29827,19.51,-0.38,52.3979




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6057486
  custom_metrics: {}
  date: 2021-11-22_17-34-16
  done: false
  episode_len_mean: 51.984455958549226
  episode_media: {}
  episode_reward_max: 15.640000000000006
  episode_reward_mean: 5.003886010362698
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 193
  episodes_total: 118109
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.070487549027286
          entropy_coeff: 0.01
          kl: 0.015114228532103782
          policy_loss: -0.06659050837569153
          total_loss: 0.0854641343127159
          vf_explained_var: 0.9459447264671326
          vf_loss: 0.13832741530722448
    num_agent_steps_sampled: 6057486
    num_agent_steps_trained: 6057486
    num_steps_sampled: 6057486
    num_steps_trained: 605

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,666,341132,6057486,5.00389,15.64,-0.51,51.9845




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6067482
  custom_metrics: {}
  date: 2021-11-22_17-43-28
  done: false
  episode_len_mean: 51.84455958549223
  episode_media: {}
  episode_reward_max: 15.640000000000006
  episode_reward_mean: 5.132849740932646
  episode_reward_min: -0.4400000000000002
  episodes_this_iter: 193
  episodes_total: 118302
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.059280160416561
          entropy_coeff: 0.01
          kl: 0.014839287205398033
          policy_loss: -0.07021774447557216
          total_loss: 0.06821607740769332
          vf_explained_var: 0.9423972964286804
          vf_loss: 0.1252208712692167
    num_agent_steps_sampled: 6067482
    num_agent_steps_trained: 6067482
    num_steps_sampled: 6067482
    num_steps_trained: 6067

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,667,341684,6067482,5.13285,15.64,-0.44,51.8446


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6077478
  custom_metrics: {}
  date: 2021-11-22_17-52-22
  done: false
  episode_len_mean: 52.68783068783069
  episode_media: {}
  episode_reward_max: 15.610000000000007
  episode_reward_mean: 4.858042328042331
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 189
  episodes_total: 118491
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.076492182222236
          entropy_coeff: 0.01
          kl: 0.015138760417117212
          policy_loss: -0.0652843018330542
          total_loss: 0.07191137602878113
          vf_explained_var: 0.9254948496818542
          vf_loss: 0.12347260966670726
    num_agent_steps_sampled: 6077478
    num_agent_steps_trained: 6077478
    num_steps_sampled: 6077478
    num_steps_trained: 6077

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,668,342218,6077478,4.85804,15.61,-0.53,52.6878


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6087474
  custom_metrics: {}
  date: 2021-11-22_18-01-19
  done: false
  episode_len_mean: 51.76165803108808
  episode_media: {}
  episode_reward_max: 15.650000000000006
  episode_reward_mean: 4.849430051813475
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 193
  episodes_total: 118684
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.064583537114193
          entropy_coeff: 0.01
          kl: 0.014953541060883447
          policy_loss: -0.06979356750424372
          total_loss: 0.07595041766792539
          vf_explained_var: 0.9418963193893433
          vf_loss: 0.132323783715854
    num_agent_steps_sampled: 6087474
    num_agent_steps_trained: 6087474
    num_steps_sampled: 6087474
    num_steps_trained: 60874

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,669,342755,6087474,4.84943,15.65,-0.53,51.7617




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6097470
  custom_metrics: {}
  date: 2021-11-22_18-10-32
  done: false
  episode_len_mean: 51.865284974093264
  episode_media: {}
  episode_reward_max: 15.510000000000009
  episode_reward_mean: 5.023367875647672
  episode_reward_min: -0.5800000000000003
  episodes_this_iter: 193
  episodes_total: 118877
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.047302767861799
          entropy_coeff: 0.01
          kl: 0.015661677546807043
          policy_loss: -0.07189504316338205
          total_loss: 0.10958614339432383
          vf_explained_var: 0.9105349183082581
          vf_loss: 0.166274954499415
    num_agent_steps_sampled: 6097470
    num_agent_steps_trained: 6097470
    num_steps_sampled: 6097470
    num_steps_trained: 6097

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,670,343308,6097470,5.02337,15.51,-0.58,51.8653




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6107466
  custom_metrics: {}
  date: 2021-11-22_18-19-43
  done: false
  episode_len_mean: 50.98461538461538
  episode_media: {}
  episode_reward_max: 15.780000000000005
  episode_reward_mean: 4.909230769230772
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 195
  episodes_total: 119072
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0649731078540463
          entropy_coeff: 0.01
          kl: 0.01567132195754745
          policy_loss: -0.06787280860846157
          total_loss: 0.08572503860692911
          vf_explained_var: 0.9492601156234741
          vf_loss: 0.13854634809180585
    num_agent_steps_sampled: 6107466
    num_agent_steps_trained: 6107466
    num_steps_sampled: 6107466
    num_steps_trained: 610

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,671,343859,6107466,4.90923,15.78,-0.55,50.9846


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6117462
  custom_metrics: {}
  date: 2021-11-22_18-28-39
  done: false
  episode_len_mean: 51.83419689119171
  episode_media: {}
  episode_reward_max: 13.680000000000005
  episode_reward_mean: 5.264248704663217
  episode_reward_min: -0.7000000000000004
  episodes_this_iter: 193
  episodes_total: 119265
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.030284382133599
          entropy_coeff: 0.01
          kl: 0.01491946258346376
          policy_loss: -0.06866228250481977
          total_loss: 0.08447788294376211
          vf_explained_var: 0.9422630071640015
          vf_loss: 0.13945460711104654
    num_agent_steps_sampled: 6117462
    num_agent_steps_trained: 6117462
    num_steps_sampled: 6117462
    num_steps_trained: 6117

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,672,344395,6117462,5.26425,13.68,-0.7,51.8342


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6127458
  custom_metrics: {}
  date: 2021-11-22_18-37-37
  done: false
  episode_len_mean: 51.59487179487179
  episode_media: {}
  episode_reward_max: 15.560000000000008
  episode_reward_mean: 5.30292307692308
  episode_reward_min: -0.4200000000000002
  episodes_this_iter: 195
  episodes_total: 119460
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0565980736989093
          entropy_coeff: 0.01
          kl: 0.014709150177070793
          policy_loss: -0.07487590799469783
          total_loss: 0.07654878527083862
          vf_explained_var: 0.9409933686256409
          vf_loss: 0.138481390188882
    num_agent_steps_sampled: 6127458
    num_agent_steps_trained: 6127458
    num_steps_sampled: 6127458
    num_steps_trained: 61274

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,673,344932,6127458,5.30292,15.56,-0.42,51.5949




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6137454
  custom_metrics: {}
  date: 2021-11-22_18-46-57
  done: false
  episode_len_mean: 52.415789473684214
  episode_media: {}
  episode_reward_max: 15.550000000000008
  episode_reward_mean: 5.050842105263161
  episode_reward_min: -0.6200000000000003
  episodes_this_iter: 190
  episodes_total: 119650
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0515198732715056
          entropy_coeff: 0.01
          kl: 0.015005564144110264
          policy_loss: -0.06977159687215183
          total_loss: 0.06946969872596416
          vf_explained_var: 0.9513999223709106
          vf_loss: 0.12557194299199226
    num_agent_steps_sampled: 6137454
    num_agent_steps_trained: 6137454
    num_steps_sampled: 6137454
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,674,345493,6137454,5.05084,15.55,-0.62,52.4158


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6147450
  custom_metrics: {}
  date: 2021-11-22_18-55-52
  done: false
  episode_len_mean: 52.068062827225134
  episode_media: {}
  episode_reward_max: 15.600000000000007
  episode_reward_mean: 5.111151832460737
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 191
  episodes_total: 119841
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0639675641155626
          entropy_coeff: 0.01
          kl: 0.015172693046424157
          policy_loss: -0.0680759010070724
          total_loss: 0.07847780183880337
          vf_explained_var: 0.944877564907074
          vf_loss: 0.13262808564657638
    num_agent_steps_sampled: 6147450
    num_agent_steps_trained: 6147450
    num_steps_sampled: 6147450
    num_steps_trained: 614

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,675,346027,6147450,5.11115,15.6,-0.55,52.0681




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6157446
  custom_metrics: {}
  date: 2021-11-22_19-05-00
  done: false
  episode_len_mean: 53.02105263157895
  episode_media: {}
  episode_reward_max: 17.700000000000003
  episode_reward_mean: 5.206631578947372
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 190
  episodes_total: 120031
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0381848420244624
          entropy_coeff: 0.01
          kl: 0.015006673001916033
          policy_loss: -0.06673954513675104
          total_loss: 0.08866516918277954
          vf_explained_var: 0.9312741160392761
          vf_loss: 0.1415994849390758
    num_agent_steps_sampled: 6157446
    num_agent_steps_trained: 6157446
    num_steps_sampled: 6157446
    num_steps_trained: 615

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,676,346576,6157446,5.20663,17.7,-0.51,53.0211




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6167442
  custom_metrics: {}
  date: 2021-11-22_19-14-12
  done: false
  episode_len_mean: 52.735449735449734
  episode_media: {}
  episode_reward_max: 15.380000000000011
  episode_reward_mean: 4.728571428571432
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 189
  episodes_total: 120220
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.075902780998184
          entropy_coeff: 0.01
          kl: 0.014764033430434097
          policy_loss: -0.0726697923859743
          total_loss: 0.07269753461136368
          vf_explained_var: 0.9393609762191772
          vf_loss: 0.13249204011613422
    num_agent_steps_sampled: 6167442
    num_agent_steps_trained: 6167442
    num_steps_sampled: 6167442
    num_steps_trained: 616

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,677,347128,6167442,4.72857,15.38,-0.52,52.7354


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6177438
  custom_metrics: {}
  date: 2021-11-22_19-23-12
  done: false
  episode_len_mean: 52.91005291005291
  episode_media: {}
  episode_reward_max: 13.650000000000006
  episode_reward_mean: 5.288624338624342
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 189
  episodes_total: 120409
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.04213205245604
          entropy_coeff: 0.01
          kl: 0.014716888478020364
          policy_loss: -0.06604335122507934
          total_loss: 0.08700215588984306
          vf_explained_var: 0.9426924586296082
          vf_loss: 0.1399399151676802
    num_agent_steps_sampled: 6177438
    num_agent_steps_trained: 6177438
    num_steps_sampled: 6177438
    num_steps_trained: 6177

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,678,347668,6177438,5.28862,13.65,-0.48,52.9101


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6187434
  custom_metrics: {}
  date: 2021-11-22_19-32-14
  done: false
  episode_len_mean: 52.518324607329845
  episode_media: {}
  episode_reward_max: 23.609999999999975
  episode_reward_mean: 5.471937172774873
  episode_reward_min: -0.47000000000000025
  episodes_this_iter: 191
  episodes_total: 120600
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0179906392193225
          entropy_coeff: 0.01
          kl: 0.015909215794979097
          policy_loss: -0.06548373314793328
          total_loss: 0.09338853779849028
          vf_explained_var: 0.9470499157905579
          vf_loss: 0.14280899474041978
    num_agent_steps_sampled: 6187434
    num_agent_steps_trained: 6187434
    num_steps_sampled: 6187434
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,679,348210,6187434,5.47194,23.61,-0.47,52.5183




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6197430
  custom_metrics: {}
  date: 2021-11-22_19-41-24
  done: false
  episode_len_mean: 53.149732620320854
  episode_media: {}
  episode_reward_max: 23.609999999999964
  episode_reward_mean: 5.229144385026743
  episode_reward_min: -0.46000000000000024
  episodes_this_iter: 187
  episodes_total: 120787
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0235907915366225
          entropy_coeff: 0.01
          kl: 0.01523975461452323
          policy_loss: -0.06779927657678098
          total_loss: 0.10198452566911295
          vf_explained_var: 0.9340812563896179
          vf_loss: 0.15530164249832312
    num_agent_steps_sampled: 6197430
    num_agent_steps_trained: 6197430
    num_steps_sampled: 6197430
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,680,348760,6197430,5.22914,23.61,-0.46,53.1497




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6207426
  custom_metrics: {}
  date: 2021-11-22_19-50-44
  done: false
  episode_len_mean: 53.148148148148145
  episode_media: {}
  episode_reward_max: 17.400000000000002
  episode_reward_mean: 5.148730158730163
  episode_reward_min: -0.6100000000000003
  episodes_this_iter: 189
  episodes_total: 120976
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0159726183577233
          entropy_coeff: 0.01
          kl: 0.015690927528856122
          policy_loss: -0.06979198286049891
          total_loss: 0.09694274922681966
          vf_explained_var: 0.9282658100128174
          vf_loss: 0.15114856316506053
    num_agent_steps_sampled: 6207426
    num_agent_steps_trained: 6207426
    num_steps_sampled: 6207426
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,681,349319,6207426,5.14873,17.4,-0.61,53.1481


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6217422
  custom_metrics: {}
  date: 2021-11-22_19-59-39
  done: false
  episode_len_mean: 52.62105263157895
  episode_media: {}
  episode_reward_max: 15.46000000000001
  episode_reward_mean: 5.335631578947373
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 190
  episodes_total: 121166
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0187400438220626
          entropy_coeff: 0.01
          kl: 0.014707570343269858
          policy_loss: -0.06532719498491206
          total_loss: 0.09255445916843678
          vf_explained_var: 0.9454560279846191
          vf_loss: 0.14456337024813076
    num_agent_steps_sampled: 6217422
    num_agent_steps_trained: 6217422
    num_steps_sampled: 6217422
    num_steps_trained: 62

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,682,349854,6217422,5.33563,15.46,-0.48,52.6211




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6227418
  custom_metrics: {}
  date: 2021-11-22_20-08-44
  done: false
  episode_len_mean: 53.66129032258065
  episode_media: {}
  episode_reward_max: 17.659999999999975
  episode_reward_mean: 5.080860215053768
  episode_reward_min: -0.47000000000000025
  episodes_this_iter: 186
  episodes_total: 121352
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0180623825774133
          entropy_coeff: 0.01
          kl: 0.015195738797933477
          policy_loss: -0.06224563511815783
          total_loss: 0.0947674511496185
          vf_explained_var: 0.9463649392127991
          vf_loss: 0.14257591660527222
    num_agent_steps_sampled: 6227418
    num_agent_steps_trained: 6227418
    num_steps_sampled: 6227418
    num_steps_trained: 62

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,683,350399,6227418,5.08086,17.66,-0.47,53.6613




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6237414
  custom_metrics: {}
  date: 2021-11-22_20-17-50
  done: false
  episode_len_mean: 53.51075268817204
  episode_media: {}
  episode_reward_max: 15.570000000000007
  episode_reward_mean: 5.4512903225806495
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 186
  episodes_total: 121538
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0074780194993957
          entropy_coeff: 0.01
          kl: 0.015297458353223816
          policy_loss: -0.0697105207299088
          total_loss: 0.08073006263844455
          vf_explained_var: 0.9507426023483276
          vf_loss: 0.13566583977136987
    num_agent_steps_sampled: 6237414
    num_agent_steps_trained: 6237414
    num_steps_sampled: 6237414
    num_steps_trained: 62

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,684,350945,6237414,5.45129,15.57,-0.5,53.5108




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6247410
  custom_metrics: {}
  date: 2021-11-22_20-27-00
  done: false
  episode_len_mean: 52.43455497382199
  episode_media: {}
  episode_reward_max: 15.610000000000008
  episode_reward_mean: 5.011937172774872
  episode_reward_min: -0.45000000000000023
  episodes_this_iter: 191
  episodes_total: 121729
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0276616194640775
          entropy_coeff: 0.01
          kl: 0.015006988422828145
          policy_loss: -0.06554726141754605
          total_loss: 0.08396808708695978
          vf_explained_var: 0.9361488819122314
          vf_loss: 0.13560416822940724
    num_agent_steps_sampled: 6247410
    num_agent_steps_trained: 6247410
    num_steps_sampled: 6247410
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,685,351496,6247410,5.01194,15.61,-0.45,52.4346




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6257406
  custom_metrics: {}
  date: 2021-11-22_20-36-11
  done: false
  episode_len_mean: 52.6578947368421
  episode_media: {}
  episode_reward_max: 15.650000000000006
  episode_reward_mean: 5.135105263157898
  episode_reward_min: -0.45000000000000023
  episodes_this_iter: 190
  episodes_total: 121919
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0071031097906182
          entropy_coeff: 0.01
          kl: 0.01514642076896838
          policy_loss: -0.06981962004121604
          total_loss: 0.08506938952977139
          vf_explained_var: 0.9174328446388245
          vf_loss: 0.14045459911331018
    num_agent_steps_sampled: 6257406
    num_agent_steps_trained: 6257406
    num_steps_sampled: 6257406
    num_steps_trained: 625

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,686,352046,6257406,5.13511,15.65,-0.45,52.6579




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6267402
  custom_metrics: {}
  date: 2021-11-22_20-45-21
  done: false
  episode_len_mean: 52.4869109947644
  episode_media: {}
  episode_reward_max: 15.630000000000006
  episode_reward_mean: 5.246910994764402
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 191
  episodes_total: 122110
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0081808263996996
          entropy_coeff: 0.01
          kl: 0.015511931208201253
          policy_loss: -0.06876373069524079
          total_loss: 0.09114956733154579
          vf_explained_var: 0.9407913088798523
          vf_loss: 0.1446569877023057
    num_agent_steps_sampled: 6267402
    num_agent_steps_trained: 6267402
    num_steps_sampled: 6267402
    num_steps_trained: 6267

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,687,352597,6267402,5.24691,15.63,-0.53,52.4869




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6277398
  custom_metrics: {}
  date: 2021-11-22_20-54-36
  done: false
  episode_len_mean: 53.74594594594595
  episode_media: {}
  episode_reward_max: 15.620000000000006
  episode_reward_mean: 5.382378378378383
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 185
  episodes_total: 122295
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.9988621851287214
          entropy_coeff: 0.01
          kl: 0.014641546661777747
          policy_loss: -0.06897174360795519
          total_loss: 0.06861480432871758
          vf_explained_var: 0.9369244575500488
          vf_loss: 0.12421989476833732
    num_agent_steps_sampled: 6277398
    num_agent_steps_trained: 6277398
    num_steps_sampled: 6277398
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,688,353152,6277398,5.38238,15.62,-0.48,53.7459


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6287394
  custom_metrics: {}
  date: 2021-11-22_21-03-29
  done: false
  episode_len_mean: 54.11891891891892
  episode_media: {}
  episode_reward_max: 21.660000000000004
  episode_reward_mean: 5.1313513513513564
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 185
  episodes_total: 122480
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.9840784682088108
          entropy_coeff: 0.01
          kl: 0.014291205735117798
          policy_loss: -0.06617232904847266
          total_loss: 0.07190199359273972
          vf_explained_var: 0.9304142594337463
          vf_loss: 0.12535795379265485
    num_agent_steps_sampled: 6287394
    num_agent_steps_trained: 6287394
    num_steps_sampled: 6287394
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,689,353685,6287394,5.13135,21.66,-0.51,54.1189




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6297390
  custom_metrics: {}
  date: 2021-11-22_21-12-36
  done: false
  episode_len_mean: 53.63978494623656
  episode_media: {}
  episode_reward_max: 15.630000000000006
  episode_reward_mean: 5.060161290322585
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 186
  episodes_total: 122666
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0070484389981114
          entropy_coeff: 0.01
          kl: 0.014423201310741445
          policy_loss: -0.06665969125352206
          total_loss: 0.0791279100581076
          vf_explained_var: 0.9392281770706177
          vf_loss: 0.13300022973723888
    num_agent_steps_sampled: 6297390
    num_agent_steps_trained: 6297390
    num_steps_sampled: 6297390
    num_steps_trained: 62

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,690,354232,6297390,5.06016,15.63,-0.48,53.6398




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6307386
  custom_metrics: {}
  date: 2021-11-22_21-21-46
  done: false
  episode_len_mean: 52.536842105263155
  episode_media: {}
  episode_reward_max: 17.629999999999992
  episode_reward_mean: 5.203105263157899
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 190
  episodes_total: 122856
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0165543754656152
          entropy_coeff: 0.01
          kl: 0.014493507042888383
          policy_loss: -0.06462987091177952
          total_loss: 0.08976941642418773
          vf_explained_var: 0.9152203798294067
          vf_loss: 0.1415468103325286
    num_agent_steps_sampled: 6307386
    num_agent_steps_trained: 6307386
    num_steps_sampled: 6307386
    num_steps_trained: 63

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,691,354781,6307386,5.20311,17.63,-0.55,52.5368




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6317382
  custom_metrics: {}
  date: 2021-11-22_21-30-55
  done: false
  episode_len_mean: 53.47089947089947
  episode_media: {}
  episode_reward_max: 15.530000000000008
  episode_reward_mean: 5.281481481481486
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 189
  episodes_total: 123045
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0211008860882984
          entropy_coeff: 0.01
          kl: 0.01520327627463231
          policy_loss: -0.07089085849625573
          total_loss: 0.08195313089701418
          vf_explained_var: 0.9317820072174072
          vf_loss: 0.13842003360001676
    num_agent_steps_sampled: 6317382
    num_agent_steps_trained: 6317382
    num_steps_sampled: 6317382
    num_steps_trained: 631

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,692,355330,6317382,5.28148,15.53,-0.52,53.4709




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6327378
  custom_metrics: {}
  date: 2021-11-22_21-40-32
  done: false
  episode_len_mean: 53.39784946236559
  episode_media: {}
  episode_reward_max: 17.480000000000004
  episode_reward_mean: 5.209086021505382
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 186
  episodes_total: 123231
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0231979004111156
          entropy_coeff: 0.01
          kl: 0.01506747645312324
          policy_loss: -0.06721715445953404
          total_loss: 0.07407876626356608
          vf_explained_var: 0.9428805112838745
          vf_loss: 0.1272023039688562
    num_agent_steps_sampled: 6327378
    num_agent_steps_trained: 6327378
    num_steps_sampled: 6327378
    num_steps_trained: 632

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,693,355907,6327378,5.20909,17.48,-0.49,53.3978


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6337374
  custom_metrics: {}
  date: 2021-11-22_21-49-25
  done: false
  episode_len_mean: 54.18378378378378
  episode_media: {}
  episode_reward_max: 19.619999999999976
  episode_reward_mean: 5.276378378378383
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 185
  episodes_total: 123416
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0183724411520134
          entropy_coeff: 0.01
          kl: 0.015034224484690809
          policy_loss: -0.06813436801099472
          total_loss: 0.08869789650116838
          vf_explained_var: 0.9234012365341187
          vf_loss: 0.14276614485301048
    num_agent_steps_sampled: 6337374
    num_agent_steps_trained: 6337374
    num_steps_sampled: 6337374
    num_steps_trained: 63

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,694,356440,6337374,5.27638,19.62,-0.51,54.1838


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6347370
  custom_metrics: {}
  date: 2021-11-22_21-58-19
  done: false
  episode_len_mean: 53.36898395721925
  episode_media: {}
  episode_reward_max: 15.610000000000008
  episode_reward_mean: 4.93304812834225
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 187
  episodes_total: 123603
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0193097075544686
          entropy_coeff: 0.01
          kl: 0.014540972270936963
          policy_loss: -0.06684829301230313
          total_loss: 0.07774028781061798
          vf_explained_var: 0.9363673329353333
          vf_loss: 0.13165552361922556
    num_agent_steps_sampled: 6347370
    num_agent_steps_trained: 6347370
    num_steps_sampled: 6347370
    num_steps_trained: 63

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,695,356974,6347370,4.93305,15.61,-0.48,53.369




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6357366
  custom_metrics: {}
  date: 2021-11-22_22-07-25
  done: false
  episode_len_mean: 53.60215053763441
  episode_media: {}
  episode_reward_max: 15.620000000000006
  episode_reward_mean: 4.875053763440864
  episode_reward_min: -0.47000000000000025
  episodes_this_iter: 186
  episodes_total: 123789
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.04625949160641
          entropy_coeff: 0.01
          kl: 0.01529556376491507
          policy_loss: -0.0715527188764596
          total_loss: 0.07938761548182109
          vf_explained_var: 0.9321158528327942
          vf_loss: 0.13655772177496903
    num_agent_steps_sampled: 6357366
    num_agent_steps_trained: 6357366
    num_steps_sampled: 6357366
    num_steps_trained: 63573

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,696,357520,6357366,4.87505,15.62,-0.47,53.6022




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6367362
  custom_metrics: {}
  date: 2021-11-22_22-16-33
  done: false
  episode_len_mean: 53.63101604278075
  episode_media: {}
  episode_reward_max: 15.679999999999996
  episode_reward_mean: 4.921818181818186
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 187
  episodes_total: 123976
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.031780816680456
          entropy_coeff: 0.01
          kl: 0.014865892527562482
          policy_loss: -0.06686194023600164
          total_loss: 0.07791548854762353
          vf_explained_var: 0.9258376359939575
          vf_loss: 0.13122887428088795
    num_agent_steps_sampled: 6367362
    num_agent_steps_trained: 6367362
    num_steps_sampled: 6367362
    num_steps_trained: 636

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,697,358068,6367362,4.92182,15.68,-0.51,53.631


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6377358
  custom_metrics: {}
  date: 2021-11-22_22-25-30
  done: false
  episode_len_mean: 53.16489361702128
  episode_media: {}
  episode_reward_max: 15.660000000000005
  episode_reward_mean: 5.184148936170217
  episode_reward_min: -0.47000000000000025
  episodes_this_iter: 188
  episodes_total: 124164
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.039472599848207
          entropy_coeff: 0.01
          kl: 0.014541479412358251
          policy_loss: -0.06355821956035561
          total_loss: 0.07918307362425617
          vf_explained_var: 0.9274757504463196
          vf_loss: 0.13000871040372095
    num_agent_steps_sampled: 6377358
    num_agent_steps_trained: 6377358
    num_steps_sampled: 6377358
    num_steps_trained: 63

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,698,358605,6377358,5.18415,15.66,-0.47,53.1649




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6387354
  custom_metrics: {}
  date: 2021-11-22_22-34-38
  done: false
  episode_len_mean: 53.88108108108108
  episode_media: {}
  episode_reward_max: 17.650000000000002
  episode_reward_mean: 5.450648648648654
  episode_reward_min: -0.45000000000000023
  episodes_this_iter: 185
  episodes_total: 124349
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0270362399428725
          entropy_coeff: 0.01
          kl: 0.015472279221111215
          policy_loss: -0.06736734081812712
          total_loss: 0.093113055103026
          vf_explained_var: 0.9427529573440552
          vf_loss: 0.14550297200010273
    num_agent_steps_sampled: 6387354
    num_agent_steps_trained: 6387354
    num_steps_sampled: 6387354
    num_steps_trained: 638

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,699,359153,6387354,5.45065,17.65,-0.45,53.8811




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6397350
  custom_metrics: {}
  date: 2021-11-22_22-43-47
  done: false
  episode_len_mean: 53.095238095238095
  episode_media: {}
  episode_reward_max: 15.620000000000006
  episode_reward_mean: 5.4620105820105875
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 189
  episodes_total: 124538
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0221529467996344
          entropy_coeff: 0.01
          kl: 0.015695773322034253
          policy_loss: -0.06529095137763458
          total_loss: 0.0894277782676328
          vf_explained_var: 0.9492059946060181
          vf_loss: 0.13918332331972832
    num_agent_steps_sampled: 6397350
    num_agent_steps_trained: 6397350
    num_steps_sampled: 6397350
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,700,359702,6397350,5.46201,15.62,-0.48,53.0952




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6407346
  custom_metrics: {}
  date: 2021-11-22_22-52-59
  done: false
  episode_len_mean: 52.7989417989418
  episode_media: {}
  episode_reward_max: 17.649999999999984
  episode_reward_mean: 5.341428571428575
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 189
  episodes_total: 124727
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0185632447880435
          entropy_coeff: 0.01
          kl: 0.016130757279548845
          policy_loss: -0.06547668036441336
          total_loss: 0.10680024154261844
          vf_explained_var: 0.9354764819145203
          vf_loss: 0.15571467096376865
    num_agent_steps_sampled: 6407346
    num_agent_steps_trained: 6407346
    num_steps_sampled: 6407346
    num_steps_trained: 640

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,701,360254,6407346,5.34143,17.65,-0.52,52.7989


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6417342
  custom_metrics: {}
  date: 2021-11-22_23-01-52
  done: false
  episode_len_mean: 54.005405405405405
  episode_media: {}
  episode_reward_max: 15.44000000000001
  episode_reward_mean: 4.9456756756756795
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 185
  episodes_total: 124912
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.016457175598087
          entropy_coeff: 0.01
          kl: 0.014400601680144754
          policy_loss: -0.06792697789631158
          total_loss: 0.08426916646276952
          vf_explained_var: 0.9188170433044434
          vf_loss: 0.13955434448708476
    num_agent_steps_sampled: 6417342
    num_agent_steps_trained: 6417342
    num_steps_sampled: 6417342
    num_steps_trained: 64

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,702,360787,6417342,4.94568,15.44,-0.52,54.0054




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6427338
  custom_metrics: {}
  date: 2021-11-22_23-11-04
  done: false
  episode_len_mean: 53.543010752688176
  episode_media: {}
  episode_reward_max: 17.46999999999999
  episode_reward_mean: 5.594946236559144
  episode_reward_min: -0.47000000000000025
  episodes_this_iter: 186
  episodes_total: 125098
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0075360784329566
          entropy_coeff: 0.01
          kl: 0.014840582646898545
          policy_loss: -0.07115433586728453
          total_loss: 0.07213278898619989
          vf_explained_var: 0.9429711103439331
          vf_loss: 0.12955378208040114
    num_agent_steps_sampled: 6427338
    num_agent_steps_trained: 6427338
    num_steps_sampled: 6427338
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,703,361339,6427338,5.59495,17.47,-0.47,53.543




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6437334
  custom_metrics: {}
  date: 2021-11-22_23-20-32
  done: false
  episode_len_mean: 54.69398907103825
  episode_media: {}
  episode_reward_max: 15.63
  episode_reward_mean: 5.282404371584703
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 183
  episodes_total: 125281
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.021175551725679
          entropy_coeff: 0.01
          kl: 0.015172329298821686
          policy_loss: -0.0725542885724339
          total_loss: 0.07369460706041248
          vf_explained_var: 0.9382842183113098
          vf_loss: 0.13189618706059864
    num_agent_steps_sampled: 6437334
    num_agent_steps_trained: 6437334
    num_steps_sampled: 6437334
    num_steps_trained: 6437334
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,704,361906,6437334,5.2824,15.63,-0.53,54.694




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6447330
  custom_metrics: {}
  date: 2021-11-22_23-29-33
  done: false
  episode_len_mean: 54.59239130434783
  episode_media: {}
  episode_reward_max: 17.399999999999974
  episode_reward_mean: 4.696793478260873
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 184
  episodes_total: 125465
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.051276261428274
          entropy_coeff: 0.01
          kl: 0.015236836212585992
          policy_loss: -0.07254370771971229
          total_loss: 0.07991620701674644
          vf_explained_var: 0.9339280724525452
          vf_loss: 0.13826125850807502
    num_agent_steps_sampled: 6447330
    num_agent_steps_trained: 6447330
    num_steps_sampled: 6447330
    num_steps_trained: 644

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,705,362448,6447330,4.69679,17.4,-0.53,54.5924


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6457326
  custom_metrics: {}
  date: 2021-11-22_23-38-23
  done: false
  episode_len_mean: 53.526881720430104
  episode_media: {}
  episode_reward_max: 17.58999999999999
  episode_reward_mean: 5.2943548387096815
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 186
  episodes_total: 125651
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0072182581846016
          entropy_coeff: 0.01
          kl: 0.015260505883165059
          policy_loss: -0.0687842626112903
          total_loss: 0.08153812277105116
          vf_explained_var: 0.9304132461547852
          vf_loss: 0.13562922683090017
    num_agent_steps_sampled: 6457326
    num_agent_steps_trained: 6457326
    num_steps_sampled: 6457326
    num_steps_trained: 64

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,706,362978,6457326,5.29435,17.59,-0.51,53.5269




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6467322
  custom_metrics: {}
  date: 2021-11-22_23-47-29
  done: false
  episode_len_mean: 53.598930481283425
  episode_media: {}
  episode_reward_max: 15.580000000000007
  episode_reward_mean: 5.2485026737967955
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 187
  episodes_total: 125838
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.9801962162596154
          entropy_coeff: 0.01
          kl: 0.014584808044756824
          policy_loss: -0.07193864618941123
          total_loss: 0.06458621491689616
          vf_explained_var: 0.9472094178199768
          vf_loss: 0.12310080568663342
    num_agent_steps_sampled: 6467322
    num_agent_steps_trained: 6467322
    num_steps_sampled: 6467322
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,707,363523,6467322,5.2485,15.58,-0.54,53.5989




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6477318
  custom_metrics: {}
  date: 2021-11-22_23-56-28
  done: false
  episode_len_mean: 54.10326086956522
  episode_media: {}
  episode_reward_max: 15.720000000000004
  episode_reward_mean: 5.495760869565221
  episode_reward_min: -0.6200000000000003
  episodes_this_iter: 184
  episodes_total: 126022
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0020317662193117
          entropy_coeff: 0.01
          kl: 0.01561274851063752
          policy_loss: -0.06638791759932973
          total_loss: 0.09048963776951942
          vf_explained_var: 0.9459840059280396
          vf_loss: 0.14133007761837174
    num_agent_steps_sampled: 6477318
    num_agent_steps_trained: 6477318
    num_steps_sampled: 6477318
    num_steps_trained: 647

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,708,364063,6477318,5.49576,15.72,-0.62,54.1033




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6487314
  custom_metrics: {}
  date: 2021-11-23_00-05-32
  done: false
  episode_len_mean: 53.67379679144385
  episode_media: {}
  episode_reward_max: 17.67
  episode_reward_mean: 4.91588235294118
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 187
  episodes_total: 126209
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.021216569392078
          entropy_coeff: 0.01
          kl: 0.014252937406465861
          policy_loss: -0.07412920190473309
          total_loss: 0.06163863744616968
          vf_explained_var: 0.948100209236145
          vf_loss: 0.12351003146326416
    num_agent_steps_sampled: 6487314
    num_agent_steps_trained: 6487314
    num_steps_sampled: 6487314
    num_steps_trained: 6487314
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,709,364606,6487314,4.91588,17.67,-0.55,53.6738




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6497310
  custom_metrics: {}
  date: 2021-11-23_00-14-42
  done: false
  episode_len_mean: 54.69230769230769
  episode_media: {}
  episode_reward_max: 13.590000000000005
  episode_reward_mean: 4.751538461538466
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 182
  episodes_total: 126391
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.053860915497125
          entropy_coeff: 0.01
          kl: 0.014294156028454838
          policy_loss: -0.06801560786951151
          total_loss: 0.061552499492917336
          vf_explained_var: 0.93178790807724
          vf_loss: 0.11754284116680767
    num_agent_steps_sampled: 6497310
    num_agent_steps_trained: 6497310
    num_steps_sampled: 6497310
    num_steps_trained: 6497

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,710,365156,6497310,4.75154,13.59,-0.5,54.6923




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6507306
  custom_metrics: {}
  date: 2021-11-23_00-23-54
  done: false
  episode_len_mean: 54.74863387978142
  episode_media: {}
  episode_reward_max: 15.670000000000005
  episode_reward_mean: 4.820710382513665
  episode_reward_min: -0.4900000000000002
  episodes_this_iter: 183
  episodes_total: 126574
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0236870214402916
          entropy_coeff: 0.01
          kl: 0.01529468183287922
          policy_loss: -0.07152445904664409
          total_loss: 0.07381436014837595
          vf_explained_var: 0.9404972195625305
          vf_loss: 0.13073249040330465
    num_agent_steps_sampled: 6507306
    num_agent_steps_trained: 6507306
    num_steps_sampled: 6507306
    num_steps_trained: 650

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,711,365708,6507306,4.82071,15.67,-0.49,54.7486




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6517302
  custom_metrics: {}
  date: 2021-11-23_00-33-22
  done: false
  episode_len_mean: 55.111111111111114
  episode_media: {}
  episode_reward_max: 13.670000000000003
  episode_reward_mean: 5.396611111111115
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 180
  episodes_total: 126754
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.983930199093608
          entropy_coeff: 0.01
          kl: 0.015815388500204145
          policy_loss: -0.06857226190268449
          total_loss: 0.10990444685519607
          vf_explained_var: 0.9325550198554993
          vf_loss: 0.1622865775975971
    num_agent_steps_sampled: 6517302
    num_agent_steps_trained: 6517302
    num_steps_sampled: 6517302
    num_steps_trained: 651

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,712,366277,6517302,5.39661,13.67,-0.55,55.1111


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6527298
  custom_metrics: {}
  date: 2021-11-23_00-42-12
  done: false
  episode_len_mean: 55.2967032967033
  episode_media: {}
  episode_reward_max: 13.610000000000007
  episode_reward_mean: 4.702362637362642
  episode_reward_min: -0.4000000000000003
  episodes_this_iter: 182
  episodes_total: 126936
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0360327816153148
          entropy_coeff: 0.01
          kl: 0.01428248793534366
          policy_loss: -0.07234018166078116
          total_loss: 0.06674718681140979
          vf_explained_var: 0.9390999674797058
          vf_loss: 0.12691040299150225
    num_agent_steps_sampled: 6527298
    num_agent_steps_trained: 6527298
    num_steps_sampled: 6527298
    num_steps_trained: 6527

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,713,366807,6527298,4.70236,13.61,-0.4,55.2967




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6537294
  custom_metrics: {}
  date: 2021-11-23_00-51-16
  done: false
  episode_len_mean: 54.47540983606557
  episode_media: {}
  episode_reward_max: 15.700000000000005
  episode_reward_mean: 4.879016393442627
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 183
  episodes_total: 127119
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0234951707493347
          entropy_coeff: 0.01
          kl: 0.01427482638285025
          policy_loss: -0.06438033077269152
          total_loss: 0.07747359251819337
          vf_explained_var: 0.9454240798950195
          vf_loss: 0.12956903455292723
    num_agent_steps_sampled: 6537294
    num_agent_steps_trained: 6537294
    num_steps_sampled: 6537294
    num_steps_trained: 653

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,714,367350,6537294,4.87902,15.7,-0.52,54.4754


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6547290
  custom_metrics: {}
  date: 2021-11-23_01-00-10
  done: false
  episode_len_mean: 53.93582887700535
  episode_media: {}
  episode_reward_max: 15.590000000000007
  episode_reward_mean: 4.945401069518721
  episode_reward_min: -0.5900000000000003
  episodes_this_iter: 187
  episodes_total: 127306
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0377280809075
          entropy_coeff: 0.01
          kl: 0.014336124149429847
          policy_loss: -0.07108926157601833
          total_loss: 0.06355235228473022
          vf_explained_var: 0.9419111013412476
          vf_loss: 0.12235941091845987
    num_agent_steps_sampled: 6547290
    num_agent_steps_trained: 6547290
    num_steps_sampled: 6547290
    num_steps_trained: 65472

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,715,367885,6547290,4.9454,15.59,-0.59,53.9358




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6557286
  custom_metrics: {}
  date: 2021-11-23_01-09-28
  done: false
  episode_len_mean: 53.24731182795699
  episode_media: {}
  episode_reward_max: 15.650000000000006
  episode_reward_mean: 5.277096774193552
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 186
  episodes_total: 127492
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.027858872155109
          entropy_coeff: 0.01
          kl: 0.015211364579225445
          policy_loss: -0.06875400648060025
          total_loss: 0.08060448919847209
          vf_explained_var: 0.927414059638977
          vf_loss: 0.13498369319865622
    num_agent_steps_sampled: 6557286
    num_agent_steps_trained: 6557286
    num_steps_sampled: 6557286
    num_steps_trained: 6557

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,716,368442,6557286,5.2771,15.65,-0.51,53.2473




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6567282
  custom_metrics: {}
  date: 2021-11-23_01-18-32
  done: false
  episode_len_mean: 53.68817204301075
  episode_media: {}
  episode_reward_max: 19.669999999999995
  episode_reward_mean: 5.602204301075273
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 186
  episodes_total: 127678
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.9887732089044579
          entropy_coeff: 0.01
          kl: 0.01590164342281052
          policy_loss: -0.06334871136415719
          total_loss: 0.09726317634602778
          vf_explained_var: 0.9500916004180908
          vf_loss: 0.14427368740614757
    num_agent_steps_sampled: 6567282
    num_agent_steps_trained: 6567282
    num_steps_sampled: 6567282
    num_steps_trained: 65

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,717,368987,6567282,5.6022,19.67,-0.48,53.6882


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6577278
  custom_metrics: {}
  date: 2021-11-23_01-27-29
  done: false
  episode_len_mean: 53.41711229946524
  episode_media: {}
  episode_reward_max: 13.630000000000006
  episode_reward_mean: 5.079518716577544
  episode_reward_min: -0.6000000000000005
  episodes_this_iter: 187
  episodes_total: 127865
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.013868046165949
          entropy_coeff: 0.01
          kl: 0.015093428841346963
          policy_loss: -0.07115614965596653
          total_loss: 0.07254605087210625
          vf_explained_var: 0.9417508840560913
          vf_loss: 0.12945616233385127
    num_agent_steps_sampled: 6577278
    num_agent_steps_trained: 6577278
    num_steps_sampled: 6577278
    num_steps_trained: 657

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,718,369523,6577278,5.07952,13.63,-0.6,53.4171




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6587274
  custom_metrics: {}
  date: 2021-11-23_01-36-36
  done: false
  episode_len_mean: 53.15873015873016
  episode_media: {}
  episode_reward_max: 13.680000000000005
  episode_reward_mean: 4.998677248677252
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 189
  episodes_total: 128054
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.041509723495767
          entropy_coeff: 0.01
          kl: 0.015404822001477763
          policy_loss: -0.06788019376954363
          total_loss: 0.08982635223084842
          vf_explained_var: 0.9439417123794556
          vf_loss: 0.14302753101824875
    num_agent_steps_sampled: 6587274
    num_agent_steps_trained: 6587274
    num_steps_sampled: 6587274
    num_steps_trained: 658

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,719,370070,6587274,4.99868,13.68,-0.54,53.1587




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6597270
  custom_metrics: {}
  date: 2021-11-23_01-45-44
  done: false
  episode_len_mean: 53.76881720430107
  episode_media: {}
  episode_reward_max: 15.510000000000009
  episode_reward_mean: 4.842204301075273
  episode_reward_min: -0.5600000000000003
  episodes_this_iter: 186
  episodes_total: 128240
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0554941247505356
          entropy_coeff: 0.01
          kl: 0.014867579128749957
          policy_loss: -0.0692152873225744
          total_loss: 0.08226771387462686
          vf_explained_var: 0.929538369178772
          vf_loss: 0.13816773801673593
    num_agent_steps_sampled: 6597270
    num_agent_steps_trained: 6597270
    num_steps_sampled: 6597270
    num_steps_trained: 6597

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,720,370618,6597270,4.8422,15.51,-0.56,53.7688




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6607266
  custom_metrics: {}
  date: 2021-11-23_01-55-45
  done: false
  episode_len_mean: 53.72432432432432
  episode_media: {}
  episode_reward_max: 15.590000000000007
  episode_reward_mean: 4.8572972972973005
  episode_reward_min: -0.6600000000000004
  episodes_this_iter: 185
  episodes_total: 128425
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.016139799500086
          entropy_coeff: 0.01
          kl: 0.014548022383250049
          policy_loss: -0.0659204858024095
          total_loss: 0.07948334736779014
          vf_explained_var: 0.9100826978683472
          vf_loss: 0.13242301662418857
    num_agent_steps_sampled: 6607266
    num_agent_steps_trained: 6607266
    num_steps_sampled: 6607266
    num_steps_trained: 660

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,721,371220,6607266,4.8573,15.59,-0.66,53.7243




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6617262
  custom_metrics: {}
  date: 2021-11-23_02-05-00
  done: false
  episode_len_mean: 53.87096774193548
  episode_media: {}
  episode_reward_max: 15.620000000000005
  episode_reward_mean: 5.360268817204306
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 186
  episodes_total: 128611
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.9914565632142216
          entropy_coeff: 0.01
          kl: 0.014523432371254998
          policy_loss: -0.06712876162906381
          total_loss: 0.08716436327463471
          vf_explained_var: 0.9443734884262085
          vf_loss: 0.14112149471450733
    num_agent_steps_sampled: 6617262
    num_agent_steps_trained: 6617262
    num_steps_sampled: 6617262
    num_steps_trained: 66

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,722,371775,6617262,5.36027,15.62,-0.54,53.871




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6627258
  custom_metrics: {}
  date: 2021-11-23_02-14-03
  done: false
  episode_len_mean: 54.043010752688176
  episode_media: {}
  episode_reward_max: 13.660000000000005
  episode_reward_mean: 5.31913978494624
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 186
  episodes_total: 128797
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0104186823808523
          entropy_coeff: 0.01
          kl: 0.015130945948406034
          policy_loss: -0.06835061655470036
          total_loss: 0.09260573820105214
          vf_explained_var: 0.9302023649215698
          vf_loss: 0.1465903543423951
    num_agent_steps_sampled: 6627258
    num_agent_steps_trained: 6627258
    num_steps_sampled: 6627258
    num_steps_trained: 66

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,723,372317,6627258,5.31914,13.66,-0.49,54.043


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6637254
  custom_metrics: {}
  date: 2021-11-23_02-22-52
  done: false
  episode_len_mean: 54.09782608695652
  episode_media: {}
  episode_reward_max: 17.73
  episode_reward_mean: 4.896195652173917
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 184
  episodes_total: 128981
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0501942627401237
          entropy_coeff: 0.01
          kl: 0.013996323719482917
          policy_loss: -0.07002075888230316
          total_loss: 0.07170827603151929
          vf_explained_var: 0.9385868906974792
          vf_loss: 0.13034560083856725
    num_agent_steps_sampled: 6637254
    num_agent_steps_trained: 6637254
    num_steps_sampled: 6637254
    num_steps_trained: 6637254
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,724,372846,6637254,4.8962,17.73,-0.51,54.0978




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6647250
  custom_metrics: {}
  date: 2021-11-23_02-32-15
  done: false
  episode_len_mean: 53.202127659574465
  episode_media: {}
  episode_reward_max: 13.630000000000006
  episode_reward_mean: 5.290372340425536
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 188
  episodes_total: 129169
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.005004085163515
          entropy_coeff: 0.01
          kl: 0.014091714459911954
          policy_loss: -0.07107668719234418
          total_loss: 0.06764818204960994
          vf_explained_var: 0.9491779804229736
          vf_loss: 0.12667222128513392
    num_agent_steps_sampled: 6647250
    num_agent_steps_trained: 6647250
    num_steps_sampled: 6647250
    num_steps_trained: 66

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,725,373409,6647250,5.29037,13.63,-0.5,53.2021


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6657246
  custom_metrics: {}
  date: 2021-11-23_02-41-05
  done: false
  episode_len_mean: 54.026881720430104
  episode_media: {}
  episode_reward_max: 15.640000000000004
  episode_reward_mean: 5.087580645161294
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 186
  episodes_total: 129355
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0249487051044603
          entropy_coeff: 0.01
          kl: 0.014186976646430703
          policy_loss: -0.069381324855982
          total_loss: 0.06522895776245251
          vf_explained_var: 0.9491057991981506
          vf_loss: 0.12254006234025695
    num_agent_steps_sampled: 6657246
    num_agent_steps_trained: 6657246
    num_steps_sampled: 6657246
    num_steps_trained: 665

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,726,373939,6657246,5.08758,15.64,-0.5,54.0269




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6667242
  custom_metrics: {}
  date: 2021-11-23_02-50-23
  done: false
  episode_len_mean: 53.365591397849464
  episode_media: {}
  episode_reward_max: 15.640000000000002
  episode_reward_mean: 5.297903225806456
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 186
  episodes_total: 129541
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.9869587139910962
          entropy_coeff: 0.01
          kl: 0.014646302367318769
          policy_loss: -0.06977314276539337
          total_loss: 0.07214184259300015
          vf_explained_var: 0.9497321248054504
          vf_loss: 0.12841846462410228
    num_agent_steps_sampled: 6667242
    num_agent_steps_trained: 6667242
    num_steps_sampled: 6667242
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,727,374497,6667242,5.2979,15.64,-0.54,53.3656




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6677238
  custom_metrics: {}
  date: 2021-11-23_02-59-39
  done: false
  episode_len_mean: 54.25945945945946
  episode_media: {}
  episode_reward_max: 15.680000000000007
  episode_reward_mean: 5.197729729729734
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 185
  episodes_total: 129726
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0115535436145753
          entropy_coeff: 0.01
          kl: 0.01514730364971481
          policy_loss: -0.06819287885441426
          total_loss: 0.08412062322735671
          vf_explained_var: 0.9303103089332581
          vf_loss: 0.13792158600399232
    num_agent_steps_sampled: 6677238
    num_agent_steps_trained: 6677238
    num_steps_sampled: 6677238
    num_steps_trained: 66

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,728,375053,6677238,5.19773,15.68,-0.48,54.2595


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6687234
  custom_metrics: {}
  date: 2021-11-23_03-08-31
  done: false
  episode_len_mean: 53.494623655913976
  episode_media: {}
  episode_reward_max: 15.620000000000006
  episode_reward_mean: 4.759623655913982
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 186
  episodes_total: 129912
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.02263046943519
          entropy_coeff: 0.01
          kl: 0.015084784325662412
          policy_loss: -0.07045661804813608
          total_loss: 0.08319957280314211
          vf_explained_var: 0.9368581771850586
          vf_loss: 0.1395174702019786
    num_agent_steps_sampled: 6687234
    num_agent_steps_trained: 6687234
    num_steps_sampled: 6687234
    num_steps_trained: 6687

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,729,375585,6687234,4.75962,15.62,-0.53,53.4946


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6697230
  custom_metrics: {}
  date: 2021-11-23_03-17-30
  done: false
  episode_len_mean: 53.76881720430107
  episode_media: {}
  episode_reward_max: 15.600000000000007
  episode_reward_mean: 5.111075268817208
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 186
  episodes_total: 130098
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.993206834410089
          entropy_coeff: 0.01
          kl: 0.014813523490175446
          policy_loss: -0.07076853191836154
          total_loss: 0.08124375258081842
          vf_explained_var: 0.93455970287323
          vf_loss: 0.1381972934590675
    num_agent_steps_sampled: 6697230
    num_agent_steps_trained: 6697230
    num_steps_sampled: 6697230
    num_steps_trained: 669723

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,730,376124,6697230,5.11108,15.6,-0.5,53.7688




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6707226
  custom_metrics: {}
  date: 2021-11-23_03-26-35
  done: false
  episode_len_mean: 53.844919786096256
  episode_media: {}
  episode_reward_max: 15.690000000000005
  episode_reward_mean: 5.5529411764705925
  episode_reward_min: -0.42000000000000015
  episodes_this_iter: 187
  episodes_total: 130285
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.9767733864276764
          entropy_coeff: 0.01
          kl: 0.015086560335890765
          policy_loss: -0.06887513068841838
          total_loss: 0.08490015367700485
          vf_explained_var: 0.9458453059196472
          vf_loss: 0.1391739463487857
    num_agent_steps_sampled: 6707226
    num_agent_steps_trained: 6707226
    num_steps_sampled: 6707226
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,731,376669,6707226,5.55294,15.69,-0.42,53.8449




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6717222
  custom_metrics: {}
  date: 2021-11-23_03-35-49
  done: false
  episode_len_mean: 55.105555555555554
  episode_media: {}
  episode_reward_max: 15.520000000000008
  episode_reward_mean: 4.95855555555556
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 180
  episodes_total: 130465
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.994721248518511
          entropy_coeff: 0.01
          kl: 0.014233822087427916
          policy_loss: -0.06685210045361455
          total_loss: 0.07177476423953723
          vf_explained_var: 0.9386142492294312
          vf_loss: 0.12614765027818461
    num_agent_steps_sampled: 6717222
    num_agent_steps_trained: 6717222
    num_steps_sampled: 6717222
    num_steps_trained: 671

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,732,377223,6717222,4.95856,15.52,-0.55,55.1056




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6727218
  custom_metrics: {}
  date: 2021-11-23_03-44-58
  done: false
  episode_len_mean: 55.25414364640884
  episode_media: {}
  episode_reward_max: 17.669999999999977
  episode_reward_mean: 5.197071823204424
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 181
  episodes_total: 130646
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0140820068767273
          entropy_coeff: 0.01
          kl: 0.01457710924292293
          policy_loss: -0.0698292033850689
          total_loss: 0.06908995501942547
          vf_explained_var: 0.9304777979850769
          vf_loss: 0.12585149971201612
    num_agent_steps_sampled: 6727218
    num_agent_steps_trained: 6727218
    num_steps_sampled: 6727218
    num_steps_trained: 6727

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,733,377772,6727218,5.19707,17.67,-0.52,55.2541


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6737214
  custom_metrics: {}
  date: 2021-11-23_03-53-45
  done: false
  episode_len_mean: 55.52777777777778
  episode_media: {}
  episode_reward_max: 15.680000000000005
  episode_reward_mean: 5.211722222222226
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 180
  episodes_total: 130826
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.9992542268760711
          entropy_coeff: 0.01
          kl: 0.015106067891157496
          policy_loss: -0.06896288548348996
          total_loss: 0.08537443506942377
          vf_explained_var: 0.9258055686950684
          vf_loss: 0.13991635120183932
    num_agent_steps_sampled: 6737214
    num_agent_steps_trained: 6737214
    num_steps_sampled: 6737214
    num_steps_trained: 67

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,734,378299,6737214,5.21172,15.68,-0.54,55.5278




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6747210
  custom_metrics: {}
  date: 2021-11-23_04-02-53
  done: false
  episode_len_mean: 54.31521739130435
  episode_media: {}
  episode_reward_max: 17.45
  episode_reward_mean: 5.047880434782613
  episode_reward_min: -0.4900000000000002
  episodes_this_iter: 184
  episodes_total: 131010
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.026323837353044
          entropy_coeff: 0.01
          kl: 0.015055550449222493
          policy_loss: -0.0684745076232897
          total_loss: 0.08553915830803839
          vf_explained_var: 0.9286423921585083
          vf_loss: 0.13997847705604471
    num_agent_steps_sampled: 6747210
    num_agent_steps_trained: 6747210
    num_steps_sampled: 6747210
    num_steps_trained: 6747210
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,735,378846,6747210,5.04788,17.45,-0.49,54.3152




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6757206
  custom_metrics: {}
  date: 2021-11-23_04-11-54
  done: false
  episode_len_mean: 54.73224043715847
  episode_media: {}
  episode_reward_max: 15.630000000000006
  episode_reward_mean: 5.459344262295087
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 183
  episodes_total: 131193
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.9960527467679785
          entropy_coeff: 0.01
          kl: 0.01587797998276952
          policy_loss: -0.06698987649780873
          total_loss: 0.1096873553817668
          vf_explained_var: 0.9214758276939392
          vf_loss: 0.16046573482921356
    num_agent_steps_sampled: 6757206
    num_agent_steps_trained: 6757206
    num_steps_sampled: 6757206
    num_steps_trained: 6757

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,736,379388,6757206,5.45934,15.63,-0.53,54.7322




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6767202
  custom_metrics: {}
  date: 2021-11-23_04-20-53
  done: false
  episode_len_mean: 55.0
  episode_media: {}
  episode_reward_max: 15.610000000000005
  episode_reward_mean: 5.293867403314922
  episode_reward_min: -0.7300000000000004
  episodes_this_iter: 181
  episodes_total: 131374
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0094263663493006
          entropy_coeff: 0.01
          kl: 0.015672517781829463
          policy_loss: -0.06737209919921028
          total_loss: 0.09250488977226319
          vf_explained_var: 0.9398441314697266
          vf_loss: 0.14426729640863598
    num_agent_steps_sampled: 6767202
    num_agent_steps_trained: 6767202
    num_steps_sampled: 6767202
    num_steps_trained: 6767202
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,737,379927,6767202,5.29387,15.61,-0.73,55


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6777198
  custom_metrics: {}
  date: 2021-11-23_04-29-44
  done: false
  episode_len_mean: 53.73262032085562
  episode_media: {}
  episode_reward_max: 17.54999999999998
  episode_reward_mean: 4.934064171122999
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 187
  episodes_total: 131561
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.021057398084656
          entropy_coeff: 0.01
          kl: 0.014826075400119642
          policy_loss: -0.06782338378603694
          total_loss: 0.08821818683871638
          vf_explained_var: 0.9333072304725647
          vf_loss: 0.14247649084378303
    num_agent_steps_sampled: 6777198
    num_agent_steps_trained: 6777198
    num_steps_sampled: 6777198
    num_steps_trained: 6777

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,738,380457,6777198,4.93406,17.55,-0.51,53.7326




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6787194
  custom_metrics: {}
  date: 2021-11-23_04-38-45
  done: false
  episode_len_mean: 54.568306010928964
  episode_media: {}
  episode_reward_max: 13.660000000000005
  episode_reward_mean: 4.72355191256831
  episode_reward_min: -0.6500000000000004
  episodes_this_iter: 183
  episodes_total: 131744
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0341806427422298
          entropy_coeff: 0.01
          kl: 0.014312358619233714
          policy_loss: -0.06552722804110644
          total_loss: 0.09102612148152069
          vf_explained_var: 0.9256945848464966
          vf_loss: 0.14428981231463825
    num_agent_steps_sampled: 6787194
    num_agent_steps_trained: 6787194
    num_steps_sampled: 6787194
    num_steps_trained: 67

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,739,380999,6787194,4.72355,13.66,-0.65,54.5683




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6797190
  custom_metrics: {}
  date: 2021-11-23_04-47-49
  done: false
  episode_len_mean: 54.65217391304348
  episode_media: {}
  episode_reward_max: 17.41000000000001
  episode_reward_mean: 5.012608695652178
  episode_reward_min: -0.6400000000000003
  episodes_this_iter: 184
  episodes_total: 131928
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.019425179824293
          entropy_coeff: 0.01
          kl: 0.015889405770208635
          policy_loss: -0.07022276054996625
          total_loss: 0.10038081736936136
          vf_explained_var: 0.9227597117424011
          vf_loss: 0.15459977524910481
    num_agent_steps_sampled: 6797190
    num_agent_steps_trained: 6797190
    num_steps_sampled: 6797190
    num_steps_trained: 6797

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,740,381542,6797190,5.01261,17.41,-0.64,54.6522




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6807186
  custom_metrics: {}
  date: 2021-11-23_04-57-09
  done: false
  episode_len_mean: 54.53551912568306
  episode_media: {}
  episode_reward_max: 15.670000000000005
  episode_reward_mean: 5.336448087431698
  episode_reward_min: -0.6100000000000003
  episodes_this_iter: 183
  episodes_total: 132111
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0307764293678314
          entropy_coeff: 0.01
          kl: 0.01537408963745005
          policy_loss: -0.06886012300602455
          total_loss: 0.08366560777975021
          vf_explained_var: 0.9172341823577881
          vf_loss: 0.1378093964819531
    num_agent_steps_sampled: 6807186
    num_agent_steps_trained: 6807186
    num_steps_sampled: 6807186
    num_steps_trained: 6807

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,741,382102,6807186,5.33645,15.67,-0.61,54.5355


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6817182
  custom_metrics: {}
  date: 2021-11-23_05-05-57
  done: false
  episode_len_mean: 54.45652173913044
  episode_media: {}
  episode_reward_max: 13.670000000000005
  episode_reward_mean: 4.981956521739135
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 184
  episodes_total: 132295
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.044741813820529
          entropy_coeff: 0.01
          kl: 0.015570640996063317
          policy_loss: -0.07445101722993958
          total_loss: 0.08425022157514678
          vf_explained_var: 0.9334119558334351
          vf_loss: 0.14367678858364097
    num_agent_steps_sampled: 6817182
    num_agent_steps_trained: 6817182
    num_steps_sampled: 6817182
    num_steps_trained: 681

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,742,382630,6817182,4.98196,13.67,-0.52,54.4565




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6827178
  custom_metrics: {}
  date: 2021-11-23_05-15-00
  done: false
  episode_len_mean: 53.53763440860215
  episode_media: {}
  episode_reward_max: 15.620000000000006
  episode_reward_mean: 5.093172043010756
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 186
  episodes_total: 132481
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0472314174634865
          entropy_coeff: 0.01
          kl: 0.0158354089447733
          policy_loss: -0.06767176010021596
          total_loss: 0.12045108948476987
          vf_explained_var: 0.927758514881134
          vf_loss: 0.17252012183665333
    num_agent_steps_sampled: 6827178
    num_agent_steps_trained: 6827178
    num_steps_sampled: 6827178
    num_steps_trained: 68271

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,743,383174,6827178,5.09317,15.62,-0.52,53.5376




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6837174
  custom_metrics: {}
  date: 2021-11-23_05-24-05
  done: false
  episode_len_mean: 54.108695652173914
  episode_media: {}
  episode_reward_max: 17.54999999999998
  episode_reward_mean: 5.1992934782608735
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 184
  episodes_total: 132665
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.035397347867728
          entropy_coeff: 0.01
          kl: 0.014789729627831868
          policy_loss: -0.0647260576780811
          total_loss: 0.09719967837350617
          vf_explained_var: 0.9280728101730347
          vf_loss: 0.14858685517926742
    num_agent_steps_sampled: 6837174
    num_agent_steps_trained: 6837174
    num_steps_sampled: 6837174
    num_steps_trained: 683

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,744,383718,6837174,5.19929,17.55,-0.55,54.1087




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6847170
  custom_metrics: {}
  date: 2021-11-23_05-33-31
  done: false
  episode_len_mean: 54.043010752688176
  episode_media: {}
  episode_reward_max: 19.679999999999993
  episode_reward_mean: 5.0453763440860255
  episode_reward_min: -0.5800000000000003
  episodes_this_iter: 186
  episodes_total: 132851
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.026441018935667
          entropy_coeff: 0.01
          kl: 0.015567076972883167
          policy_loss: -0.06620313818214683
          total_loss: 0.0952431769063407
          vf_explained_var: 0.9200409054756165
          vf_loss: 0.1462469766456933
    num_agent_steps_sampled: 6847170
    num_agent_steps_trained: 6847170
    num_steps_sampled: 6847170
    num_steps_trained: 684

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,745,384285,6847170,5.04538,19.68,-0.58,54.043




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6857166
  custom_metrics: {}
  date: 2021-11-23_05-42-32
  done: false
  episode_len_mean: 53.79459459459459
  episode_media: {}
  episode_reward_max: 13.540000000000008
  episode_reward_mean: 5.036810810810816
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 185
  episodes_total: 133036
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0146496139376997
          entropy_coeff: 0.01
          kl: 0.014527092568062715
          policy_loss: -0.07271513909571473
          total_loss: 0.07712952008609954
          vf_explained_var: 0.9328609108924866
          vf_loss: 0.13689662147795847
    num_agent_steps_sampled: 6857166
    num_agent_steps_trained: 6857166
    num_steps_sampled: 6857166
    num_steps_trained: 68

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,746,384826,6857166,5.03681,13.54,-0.52,53.7946


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6867162
  custom_metrics: {}
  date: 2021-11-23_05-51-24
  done: false
  episode_len_mean: 53.52150537634409
  episode_media: {}
  episode_reward_max: 15.57000000000001
  episode_reward_mean: 5.357634408602156
  episode_reward_min: -0.5800000000000003
  episodes_this_iter: 186
  episodes_total: 133222
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.027343169560873
          entropy_coeff: 0.01
          kl: 0.015060225158410188
          policy_loss: -0.06749929746836562
          total_loss: 0.08491479258776977
          vf_explained_var: 0.9410747289657593
          vf_loss: 0.13837844472711375
    num_agent_steps_sampled: 6867162
    num_agent_steps_trained: 6867162
    num_steps_sampled: 6867162
    num_steps_trained: 6867

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,747,385358,6867162,5.35763,15.57,-0.58,53.5215




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6877158
  custom_metrics: {}
  date: 2021-11-23_06-00-41
  done: false
  episode_len_mean: 53.537234042553195
  episode_media: {}
  episode_reward_max: 15.610000000000007
  episode_reward_mean: 5.715159574468089
  episode_reward_min: -0.5600000000000003
  episodes_this_iter: 188
  episodes_total: 133410
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0119808103904187
          entropy_coeff: 0.01
          kl: 0.015326222684644047
          policy_loss: -0.06971112273267462
          total_loss: 0.09374917438212413
          vf_explained_var: 0.952505350112915
          vf_loss: 0.148665053291942
    num_agent_steps_sampled: 6877158
    num_agent_steps_trained: 6877158
    num_steps_sampled: 6877158
    num_steps_trained: 6877

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,748,385914,6877158,5.71516,15.61,-0.56,53.5372


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6887154
  custom_metrics: {}
  date: 2021-11-23_06-09-33
  done: false
  episode_len_mean: 53.015957446808514
  episode_media: {}
  episode_reward_max: 17.47000000000001
  episode_reward_mean: 5.263510638297876
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 188
  episodes_total: 133598
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.03168391685888
          entropy_coeff: 0.01
          kl: 0.014998768491509006
          policy_loss: -0.06302278299965154
          total_loss: 0.10838991434086721
          vf_explained_var: 0.9411893486976624
          vf_loss: 0.1575604655971586
    num_agent_steps_sampled: 6887154
    num_agent_steps_trained: 6887154
    num_steps_sampled: 6887154
    num_steps_trained: 68871

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,749,386447,6887154,5.26351,17.47,-0.52,53.016




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6897150
  custom_metrics: {}
  date: 2021-11-23_06-18-54
  done: false
  episode_len_mean: 52.63157894736842
  episode_media: {}
  episode_reward_max: 15.41000000000001
  episode_reward_mean: 5.059578947368426
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 190
  episodes_total: 133788
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.024175001890305
          entropy_coeff: 0.01
          kl: 0.013938901963043052
          policy_loss: -0.07282366376796792
          total_loss: 0.05547947264733676
          vf_explained_var: 0.9343377351760864
          vf_loss: 0.11679032490061349
    num_agent_steps_sampled: 6897150
    num_agent_steps_trained: 6897150
    num_steps_sampled: 6897150
    num_steps_trained: 6897

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,750,387007,6897150,5.05958,15.41,-0.53,52.6316




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6907146
  custom_metrics: {}
  date: 2021-11-23_06-27-57
  done: false
  episode_len_mean: 53.736559139784944
  episode_media: {}
  episode_reward_max: 17.560000000000002
  episode_reward_mean: 5.072043010752692
  episode_reward_min: -0.46000000000000024
  episodes_this_iter: 186
  episodes_total: 133974
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.009479223461036
          entropy_coeff: 0.01
          kl: 0.014470163000158227
          policy_loss: -0.07345431857976369
          total_loss: 0.06925961226494899
          vf_explained_var: 0.9357457160949707
          vf_loss: 0.12984388201482533
    num_agent_steps_sampled: 6907146
    num_agent_steps_trained: 6907146
    num_steps_sampled: 6907146
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,751,387550,6907146,5.07204,17.56,-0.46,53.7366




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6917142
  custom_metrics: {}
  date: 2021-11-23_06-37-03
  done: false
  episode_len_mean: 53.972972972972975
  episode_media: {}
  episode_reward_max: 15.530000000000005
  episode_reward_mean: 4.802594594594599
  episode_reward_min: -0.5800000000000003
  episodes_this_iter: 185
  episodes_total: 134159
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.035761503689739
          entropy_coeff: 0.01
          kl: 0.014702117840883032
          policy_loss: -0.07081096641388092
          total_loss: 0.07332579920985008
          vf_explained_var: 0.9260939359664917
          vf_loss: 0.13100111798540387
    num_agent_steps_sampled: 6917142
    num_agent_steps_trained: 6917142
    num_steps_sampled: 6917142
    num_steps_trained: 69

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,752,388096,6917142,4.80259,15.53,-0.58,53.973




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6927138
  custom_metrics: {}
  date: 2021-11-23_06-46-09
  done: false
  episode_len_mean: 53.22751322751323
  episode_media: {}
  episode_reward_max: 15.700000000000005
  episode_reward_mean: 5.132539682539686
  episode_reward_min: -0.5700000000000003
  episodes_this_iter: 189
  episodes_total: 134348
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0326269095202525
          entropy_coeff: 0.01
          kl: 0.014904736929426973
          policy_loss: -0.06722570178319301
          total_loss: 0.08555612250907746
          vf_explained_var: 0.9261638522148132
          vf_loss: 0.13915323877303176
    num_agent_steps_sampled: 6927138
    num_agent_steps_trained: 6927138
    num_steps_sampled: 6927138
    num_steps_trained: 69

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,753,388642,6927138,5.13254,15.7,-0.57,53.2275


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6937134
  custom_metrics: {}
  date: 2021-11-23_06-55-03
  done: false
  episode_len_mean: 53.81521739130435
  episode_media: {}
  episode_reward_max: 13.580000000000007
  episode_reward_mean: 5.39239130434783
  episode_reward_min: -0.4300000000000002
  episodes_this_iter: 184
  episodes_total: 134532
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0209974620476303
          entropy_coeff: 0.01
          kl: 0.015219747135038604
          policy_loss: -0.06961254443087243
          total_loss: 0.08296393807505231
          vf_explained_var: 0.9486626982688904
          vf_loss: 0.13811396937836037
    num_agent_steps_sampled: 6937134
    num_agent_steps_trained: 6937134
    num_steps_sampled: 6937134
    num_steps_trained: 693

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,754,389176,6937134,5.39239,13.58,-0.43,53.8152




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6947130
  custom_metrics: {}
  date: 2021-11-23_07-04-17
  done: false
  episode_len_mean: 53.75401069518717
  episode_media: {}
  episode_reward_max: 17.39999999999998
  episode_reward_mean: 5.271229946524068
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 187
  episodes_total: 134719
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.055611884139149
          entropy_coeff: 0.01
          kl: 0.014710583659719716
          policy_loss: -0.07079633016489231
          total_loss: 0.07406951683529096
          vf_explained_var: 0.9147394299507141
          vf_loss: 0.13190941582218815
    num_agent_steps_sampled: 6947130
    num_agent_steps_trained: 6947130
    num_steps_sampled: 6947130
    num_steps_trained: 6947

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,755,389730,6947130,5.27123,17.4,-0.53,53.754




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6957126
  custom_metrics: {}
  date: 2021-11-23_07-13-21
  done: false
  episode_len_mean: 53.994565217391305
  episode_media: {}
  episode_reward_max: 17.589999999999993
  episode_reward_mean: 5.3165217391304385
  episode_reward_min: -0.5900000000000003
  episodes_this_iter: 184
  episodes_total: 134903
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0286018418020992
          entropy_coeff: 0.01
          kl: 0.01472206880096235
          policy_loss: -0.069657380432412
          total_loss: 0.07538669138055294
          vf_explained_var: 0.9393524527549744
          vf_loss: 0.13179137568198981
    num_agent_steps_sampled: 6957126
    num_agent_steps_trained: 6957126
    num_steps_sampled: 6957126
    num_steps_trained: 695

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,756,390274,6957126,5.31652,17.59,-0.59,53.9946




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6967122
  custom_metrics: {}
  date: 2021-11-23_07-22-50
  done: false
  episode_len_mean: 54.193548387096776
  episode_media: {}
  episode_reward_max: 17.649999999999988
  episode_reward_mean: 5.640161290322584
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 186
  episodes_total: 135089
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0159239019734794
          entropy_coeff: 0.01
          kl: 0.015258035387613028
          policy_loss: -0.06767427859362156
          total_loss: 0.08320648589614864
          vf_explained_var: 0.9370319247245789
          vf_loss: 0.13628029052923663
    num_agent_steps_sampled: 6967122
    num_agent_steps_trained: 6967122
    num_steps_sampled: 6967122
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,757,390843,6967122,5.64016,17.65,-0.53,54.1935


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6977118
  custom_metrics: {}
  date: 2021-11-23_07-31-37
  done: false
  episode_len_mean: 54.52197802197802
  episode_media: {}
  episode_reward_max: 21.63999999999999
  episode_reward_mean: 5.41516483516484
  episode_reward_min: -0.6800000000000004
  episodes_this_iter: 182
  episodes_total: 135271
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0382408351543915
          entropy_coeff: 0.01
          kl: 0.01546451928468997
          policy_loss: -0.06356839062888198
          total_loss: 0.11855772782067044
          vf_explained_var: 0.9490157961845398
          vf_loss: 0.16727841757929857
    num_agent_steps_sampled: 6977118
    num_agent_steps_trained: 6977118
    num_steps_sampled: 6977118
    num_steps_trained: 69771

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,758,391370,6977118,5.41516,21.64,-0.68,54.522




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6987114
  custom_metrics: {}
  date: 2021-11-23_07-40-56
  done: false
  episode_len_mean: 52.94708994708995
  episode_media: {}
  episode_reward_max: 15.530000000000008
  episode_reward_mean: 5.442857142857147
  episode_reward_min: -0.6000000000000003
  episodes_this_iter: 189
  episodes_total: 135460
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.021507510506963
          entropy_coeff: 0.01
          kl: 0.014338902692810433
          policy_loss: -0.07030569352382701
          total_loss: 0.07489254383447891
          vf_explained_var: 0.9528221487998962
          vf_loss: 0.1327474977619138
    num_agent_steps_sampled: 6987114
    num_agent_steps_trained: 6987114
    num_steps_sampled: 6987114
    num_steps_trained: 6987

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,759,391929,6987114,5.44286,15.53,-0.6,52.9471




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 6997110
  custom_metrics: {}
  date: 2021-11-23_07-50-02
  done: false
  episode_len_mean: 53.657754010695186
  episode_media: {}
  episode_reward_max: 15.54000000000001
  episode_reward_mean: 5.234224598930485
  episode_reward_min: -0.5800000000000003
  episodes_this_iter: 187
  episodes_total: 135647
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.055350554013348
          entropy_coeff: 0.01
          kl: 0.015193386183505348
          policy_loss: -0.06992708396060462
          total_loss: 0.08135201183611052
          vf_explained_var: 0.9189697504043579
          vf_loss: 0.13722016661985212
    num_agent_steps_sampled: 6997110
    num_agent_steps_trained: 6997110
    num_steps_sampled: 6997110
    num_steps_trained: 699

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,760,392475,6997110,5.23422,15.54,-0.58,53.6578




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7007106
  custom_metrics: {}
  date: 2021-11-23_07-59-18
  done: false
  episode_len_mean: 53.65945945945946
  episode_media: {}
  episode_reward_max: 15.590000000000007
  episode_reward_mean: 5.626702702702706
  episode_reward_min: -0.45000000000000023
  episodes_this_iter: 185
  episodes_total: 135832
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0358319630345187
          entropy_coeff: 0.01
          kl: 0.01588318551351597
          policy_loss: -0.07004689872865932
          total_loss: 0.09429643487272144
          vf_explained_var: 0.9317222237586975
          vf_loss: 0.14851777021376694
    num_agent_steps_sampled: 7007106
    num_agent_steps_trained: 7007106
    num_steps_sampled: 7007106
    num_steps_trained: 70

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,761,393031,7007106,5.6267,15.59,-0.45,53.6595


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7017102
  custom_metrics: {}
  date: 2021-11-23_08-08-07
  done: false
  episode_len_mean: 55.03825136612022
  episode_media: {}
  episode_reward_max: 17.540000000000006
  episode_reward_mean: 5.37475409836066
  episode_reward_min: -0.5900000000000003
  episodes_this_iter: 183
  episodes_total: 136015
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0029604468958446
          entropy_coeff: 0.01
          kl: 0.014986980668952493
          policy_loss: -0.06940128390136756
          total_loss: 0.07691961262631469
          vf_explained_var: 0.9399338364601135
          vf_loss: 0.1322082842785274
    num_agent_steps_sampled: 7017102
    num_agent_steps_trained: 7017102
    num_steps_sampled: 7017102
    num_steps_trained: 7017

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,762,393560,7017102,5.37475,17.54,-0.59,55.0383




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7027098
  custom_metrics: {}
  date: 2021-11-23_08-17-11
  done: false
  episode_len_mean: 53.49732620320856
  episode_media: {}
  episode_reward_max: 19.539999999999978
  episode_reward_mean: 5.083475935828881
  episode_reward_min: -0.5600000000000003
  episodes_this_iter: 187
  episodes_total: 136202
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.024127500722686
          entropy_coeff: 0.01
          kl: 0.014771955788956743
          policy_loss: -0.06877148604303712
          total_loss: 0.08040922839555865
          vf_explained_var: 0.9332848787307739
          vf_loss: 0.13576962636291875
    num_agent_steps_sampled: 7027098
    num_agent_steps_trained: 7027098
    num_steps_sampled: 7027098
    num_steps_trained: 702

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,763,394104,7027098,5.08348,19.54,-0.56,53.4973


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7037094
  custom_metrics: {}
  date: 2021-11-23_08-26-02
  done: false
  episode_len_mean: 54.016216216216215
  episode_media: {}
  episode_reward_max: 15.540000000000008
  episode_reward_mean: 5.126864864864869
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 185
  episodes_total: 136387
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.034149568554867
          entropy_coeff: 0.01
          kl: 0.014315594572422825
          policy_loss: -0.07184492327269912
          total_loss: 0.0696627187934847
          vf_explained_var: 0.9412981271743774
          vf_loss: 0.12923642232430627
    num_agent_steps_sampled: 7037094
    num_agent_steps_trained: 7037094
    num_steps_sampled: 7037094
    num_steps_trained: 703

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,764,394635,7037094,5.12686,15.54,-0.52,54.0162




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7047090
  custom_metrics: {}
  date: 2021-11-23_08-35-13
  done: false
  episode_len_mean: 52.3
  episode_media: {}
  episode_reward_max: 19.51999999999996
  episode_reward_mean: 5.51268421052632
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 190
  episodes_total: 136577
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0298245137714477
          entropy_coeff: 0.01
          kl: 0.014517568991640261
          policy_loss: -0.05879127342378929
          total_loss: 0.09663333651312413
          vf_explained_var: 0.9507020115852356
          vf_loss: 0.14265001771268415
    num_agent_steps_sampled: 7047090
    num_agent_steps_trained: 7047090
    num_steps_sampled: 7047090
    num_steps_trained: 7047090
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,765,395186,7047090,5.51268,19.52,-0.48,52.3




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7057086
  custom_metrics: {}
  date: 2021-11-23_08-44-24
  done: false
  episode_len_mean: 54.18478260869565
  episode_media: {}
  episode_reward_max: 17.55
  episode_reward_mean: 5.130326086956527
  episode_reward_min: -0.7500000000000004
  episodes_this_iter: 184
  episodes_total: 136761
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0341447005070834
          entropy_coeff: 0.01
          kl: 0.015216690175422678
          policy_loss: -0.06491168148426091
          total_loss: 0.09273843396231862
          vf_explained_var: 0.9394160509109497
          vf_loss: 0.143326039348325
    num_agent_steps_sampled: 7057086
    num_agent_steps_trained: 7057086
    num_steps_sampled: 7057086
    num_steps_trained: 7057086
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,766,395737,7057086,5.13033,17.55,-0.75,54.1848




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7067082
  custom_metrics: {}
  date: 2021-11-23_08-53-32
  done: false
  episode_len_mean: 54.344086021505376
  episode_media: {}
  episode_reward_max: 17.55
  episode_reward_mean: 5.29639784946237
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 186
  episodes_total: 136947
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0158178704570098
          entropy_coeff: 0.01
          kl: 0.015102094182316133
          policy_loss: -0.07063186511305214
          total_loss: 0.0906817807361733
          vf_explained_var: 0.9375714063644409
          vf_loss: 0.14706736526650238
    num_agent_steps_sampled: 7067082
    num_agent_steps_trained: 7067082
    num_steps_sampled: 7067082
    num_steps_trained: 7067082
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,767,396285,7067082,5.2964,17.55,-0.48,54.3441




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7077078
  custom_metrics: {}
  date: 2021-11-23_09-02-53
  done: false
  episode_len_mean: 54.55494505494506
  episode_media: {}
  episode_reward_max: 15.580000000000007
  episode_reward_mean: 5.209120879120883
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 182
  episodes_total: 137129
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0166550524981623
          entropy_coeff: 0.01
          kl: 0.015205148044033867
          policy_loss: -0.0658806287960106
          total_loss: 0.09218980893345134
          vf_explained_var: 0.9390846490859985
          vf_loss: 0.14359775864178817
    num_agent_steps_sampled: 7077078
    num_agent_steps_trained: 7077078
    num_steps_sampled: 7077078
    num_steps_trained: 707

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,768,396846,7077078,5.20912,15.58,-0.51,54.5549


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7087074
  custom_metrics: {}
  date: 2021-11-23_09-11-41
  done: false
  episode_len_mean: 55.29834254143646
  episode_media: {}
  episode_reward_max: 17.619999999999983
  episode_reward_mean: 5.32519337016575
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 181
  episodes_total: 137310
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0076678069959204
          entropy_coeff: 0.01
          kl: 0.015833225450816113
          policy_loss: -0.07048433666086457
          total_loss: 0.09193983025722982
          vf_explained_var: 0.9279422163963318
          vf_loss: 0.14643077772469765
    num_agent_steps_sampled: 7087074
    num_agent_steps_trained: 7087074
    num_steps_sampled: 7087074
    num_steps_trained: 708

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,769,397374,7087074,5.32519,17.62,-0.51,55.2983




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7097070
  custom_metrics: {}
  date: 2021-11-23_09-20-45
  done: false
  episode_len_mean: 54.9010989010989
  episode_media: {}
  episode_reward_max: 15.660000000000005
  episode_reward_mean: 5.434670329670334
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 182
  episodes_total: 137492
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0201978806989738
          entropy_coeff: 0.01
          kl: 0.015255571802873678
          policy_loss: -0.06634524008869305
          total_loss: 0.0990682159392886
          vf_explained_var: 0.9264796376228333
          vf_loss: 0.15086133606445762
    num_agent_steps_sampled: 7097070
    num_agent_steps_trained: 7097070
    num_steps_sampled: 7097070
    num_steps_trained: 7097

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,770,397918,7097070,5.43467,15.66,-0.52,54.9011




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7107066
  custom_metrics: {}
  date: 2021-11-23_09-29-59
  done: false
  episode_len_mean: 54.45108695652174
  episode_media: {}
  episode_reward_max: 15.550000000000008
  episode_reward_mean: 4.918260869565222
  episode_reward_min: -0.5800000000000003
  episodes_this_iter: 184
  episodes_total: 137676
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0599623867067467
          entropy_coeff: 0.01
          kl: 0.014530177176119185
          policy_loss: -0.07016032147135212
          total_loss: 0.0790992136930123
          vf_explained_var: 0.9310746192932129
          vf_loss: 0.13675759836552612
    num_agent_steps_sampled: 7107066
    num_agent_steps_trained: 7107066
    num_steps_sampled: 7107066
    num_steps_trained: 710

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,771,398471,7107066,4.91826,15.55,-0.58,54.4511




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7117062
  custom_metrics: {}
  date: 2021-11-23_09-39-04
  done: false
  episode_len_mean: 55.37777777777778
  episode_media: {}
  episode_reward_max: 19.519999999999985
  episode_reward_mean: 5.008777777777782
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 180
  episodes_total: 137856
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.047968835763663
          entropy_coeff: 0.01
          kl: 0.015321427289116138
          policy_loss: -0.06604383529673974
          total_loss: 0.09210272446607815
          vf_explained_var: 0.9323095083236694
          vf_loss: 0.1437221212280787
    num_agent_steps_sampled: 7117062
    num_agent_steps_trained: 7117062
    num_steps_sampled: 7117062
    num_steps_trained: 7117

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,772,399017,7117062,5.00878,19.52,-0.54,55.3778


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7127058
  custom_metrics: {}
  date: 2021-11-23_09-47-53
  done: false
  episode_len_mean: 55.90449438202247
  episode_media: {}
  episode_reward_max: 17.449999999999992
  episode_reward_mean: 5.267808988764049
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 178
  episodes_total: 138034
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0169567248428684
          entropy_coeff: 0.01
          kl: 0.015054411429003874
          policy_loss: -0.06713731741409258
          total_loss: 0.07960977733656989
          vf_explained_var: 0.9392080307006836
          vf_loss: 0.1326208289096461
    num_agent_steps_sampled: 7127058
    num_agent_steps_trained: 7127058
    num_steps_sampled: 7127058
    num_steps_trained: 71

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,773,399545,7127058,5.26781,17.45,-0.49,55.9045




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7137054
  custom_metrics: {}
  date: 2021-11-23_09-57-36
  done: false
  episode_len_mean: 54.78804347826087
  episode_media: {}
  episode_reward_max: 17.59
  episode_reward_mean: 5.061304347826091
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 184
  episodes_total: 138218
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.044940500015236
          entropy_coeff: 0.01
          kl: 0.014638468206555396
          policy_loss: -0.07234340859192515
          total_loss: 0.07383033950652744
          vf_explained_var: 0.9340908527374268
          vf_loss: 0.13327489074256016
    num_agent_steps_sampled: 7137054
    num_agent_steps_trained: 7137054
    num_steps_sampled: 7137054
    num_steps_trained: 7137054
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,774,400129,7137054,5.0613,17.59,-0.49,54.788




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7147050
  custom_metrics: {}
  date: 2021-11-23_10-06-38
  done: false
  episode_len_mean: 54.47540983606557
  episode_media: {}
  episode_reward_max: 15.610000000000007
  episode_reward_mean: 4.849180327868857
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 183
  episodes_total: 138401
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.023738563156511
          entropy_coeff: 0.01
          kl: 0.014753456056381085
          policy_loss: -0.06971381918984519
          total_loss: 0.08371310465395601
          vf_explained_var: 0.9114718437194824
          vf_loss: 0.1400540903718379
    num_agent_steps_sampled: 7147050
    num_agent_steps_trained: 7147050
    num_steps_sampled: 7147050
    num_steps_trained: 7147

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,775,400670,7147050,4.84918,15.61,-0.54,54.4754


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7157046
  custom_metrics: {}
  date: 2021-11-23_10-15-26
  done: false
  episode_len_mean: 54.76923076923077
  episode_media: {}
  episode_reward_max: 17.600000000000005
  episode_reward_mean: 5.408241758241762
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 182
  episodes_total: 138583
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.022441729604001
          entropy_coeff: 0.01
          kl: 0.015782356742937178
          policy_loss: -0.06738052447561599
          total_loss: 0.0987811514602459
          vf_explained_var: 0.9320783019065857
          vf_loss: 0.15043191202321504
    num_agent_steps_sampled: 7157046
    num_agent_steps_trained: 7157046
    num_steps_sampled: 7157046
    num_steps_trained: 715

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,776,401198,7157046,5.40824,17.6,-0.48,54.7692




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7167042
  custom_metrics: {}
  date: 2021-11-23_10-24-31
  done: false
  episode_len_mean: 55.23076923076923
  episode_media: {}
  episode_reward_max: 17.610000000000003
  episode_reward_mean: 5.498406593406598
  episode_reward_min: -0.6300000000000003
  episodes_this_iter: 182
  episodes_total: 138765
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0482899389592517
          entropy_coeff: 0.01
          kl: 0.01551550278819391
          policy_loss: -0.06443905025163883
          total_loss: 0.10034746121014233
          vf_explained_var: 0.9264808297157288
          vf_loss: 0.14992315406185466
    num_agent_steps_sampled: 7167042
    num_agent_steps_trained: 7167042
    num_steps_sampled: 7167042
    num_steps_trained: 716

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,777,401743,7167042,5.49841,17.61,-0.63,55.2308




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7177038
  custom_metrics: {}
  date: 2021-11-23_10-33-33
  done: false
  episode_len_mean: 54.77049180327869
  episode_media: {}
  episode_reward_max: 15.560000000000008
  episode_reward_mean: 5.210000000000004
  episode_reward_min: -0.7100000000000004
  episodes_this_iter: 183
  episodes_total: 138948
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0373984205674933
          entropy_coeff: 0.01
          kl: 0.01564297747201977
          policy_loss: -0.0697254446203293
          total_loss: 0.0927282939428244
          vf_explained_var: 0.9185498952865601
          vf_loss: 0.147191063556016
    num_agent_steps_sampled: 7177038
    num_agent_steps_trained: 7177038
    num_steps_sampled: 7177038
    num_steps_trained: 7177038

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,778,402285,7177038,5.21,15.56,-0.71,54.7705




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7187034
  custom_metrics: {}
  date: 2021-11-23_10-42-33
  done: false
  episode_len_mean: 54.77900552486188
  episode_media: {}
  episode_reward_max: 15.640000000000006
  episode_reward_mean: 5.7155248618784595
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 181
  episodes_total: 139129
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.032048925769377
          entropy_coeff: 0.01
          kl: 0.01473605423269799
          policy_loss: -0.07145907934211582
          total_loss: 0.08164350753606822
          vf_explained_var: 0.9483192563056946
          vf_loss: 0.13985250198523547
    num_agent_steps_sampled: 7187034
    num_agent_steps_trained: 7187034
    num_steps_sampled: 7187034
    num_steps_trained: 718

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,779,402826,7187034,5.71552,15.64,-0.5,54.779




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7197030
  custom_metrics: {}
  date: 2021-11-23_10-51-35
  done: false
  episode_len_mean: 54.475675675675674
  episode_media: {}
  episode_reward_max: 17.679999999999986
  episode_reward_mean: 4.784864864864868
  episode_reward_min: -0.5800000000000003
  episodes_this_iter: 185
  episodes_total: 139314
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0553309253899448
          entropy_coeff: 0.01
          kl: 0.015230639122248956
          policy_loss: -0.07313257483246048
          total_loss: 0.07294117759823615
          vf_explained_var: 0.9369490742683411
          vf_loss: 0.13192976086856759
    num_agent_steps_sampled: 7197030
    num_agent_steps_trained: 7197030
    num_steps_sampled: 7197030
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,780,403368,7197030,4.78486,17.68,-0.58,54.4757




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7207026
  custom_metrics: {}
  date: 2021-11-23_11-00-37
  done: false
  episode_len_mean: 55.105555555555554
  episode_media: {}
  episode_reward_max: 15.670000000000005
  episode_reward_mean: 5.613222222222226
  episode_reward_min: -0.4000000000000002
  episodes_this_iter: 180
  episodes_total: 139494
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0023205682455774
          entropy_coeff: 0.01
          kl: 0.014959447625339338
          policy_loss: -0.07293274688853302
          total_loss: 0.07164614303774783
          vf_explained_var: 0.9489567875862122
          vf_loss: 0.1305226025143526
    num_agent_steps_sampled: 7207026
    num_agent_steps_trained: 7207026
    num_steps_sampled: 7207026
    num_steps_trained: 72

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,781,403910,7207026,5.61322,15.67,-0.4,55.1056




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7217022
  custom_metrics: {}
  date: 2021-11-23_11-09-53
  done: false
  episode_len_mean: 53.6524064171123
  episode_media: {}
  episode_reward_max: 15.590000000000007
  episode_reward_mean: 5.388128342245993
  episode_reward_min: -0.5600000000000003
  episodes_this_iter: 187
  episodes_total: 139681
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.017270185597929
          entropy_coeff: 0.01
          kl: 0.016180951210254733
          policy_loss: -0.06849934734079068
          total_loss: 0.11229889093729692
          vf_explained_var: 0.9221264123916626
          vf_loss: 0.16410870966408028
    num_agent_steps_sampled: 7217022
    num_agent_steps_trained: 7217022
    num_steps_sampled: 7217022
    num_steps_trained: 7217

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,782,404465,7217022,5.38813,15.59,-0.56,53.6524


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7227018
  custom_metrics: {}
  date: 2021-11-23_11-18-46
  done: false
  episode_len_mean: 53.74594594594595
  episode_media: {}
  episode_reward_max: 21.639999999999986
  episode_reward_mean: 5.342162162162166
  episode_reward_min: -0.45000000000000023
  episodes_this_iter: 185
  episodes_total: 139866
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.012112274538561
          entropy_coeff: 0.01
          kl: 0.014643641174805978
          policy_loss: -0.06320024849921198
          total_loss: 0.09042428908847562
          vf_explained_var: 0.9470794200897217
          vf_loss: 0.14038561487619028
    num_agent_steps_sampled: 7227018
    num_agent_steps_trained: 7227018
    num_steps_sampled: 7227018
    num_steps_trained: 72

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,783,404998,7227018,5.34216,21.64,-0.45,53.7459


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7237014
  custom_metrics: {}
  date: 2021-11-23_11-27-40
  done: false
  episode_len_mean: 54.35326086956522
  episode_media: {}
  episode_reward_max: 17.51000000000001
  episode_reward_mean: 5.336358695652178
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 184
  episodes_total: 140050
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.038503879452326
          entropy_coeff: 0.01
          kl: 0.014432517779663914
          policy_loss: -0.06599725252514789
          total_loss: 0.0805263343046216
          vf_explained_var: 0.9256529808044434
          vf_loss: 0.13402954380600207
    num_agent_steps_sampled: 7237014
    num_agent_steps_trained: 7237014
    num_steps_sampled: 7237014
    num_steps_trained: 72370

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,784,405532,7237014,5.33636,17.51,-0.53,54.3533




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7247010
  custom_metrics: {}
  date: 2021-11-23_11-36-59
  done: false
  episode_len_mean: 53.56149732620321
  episode_media: {}
  episode_reward_max: 15.690000000000005
  episode_reward_mean: 4.92540106951872
  episode_reward_min: -0.5600000000000003
  episodes_this_iter: 187
  episodes_total: 140237
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0533298250183045
          entropy_coeff: 0.01
          kl: 0.014667786531159296
          policy_loss: -0.07103091138234746
          total_loss: 0.06367497622038691
          vf_explained_var: 0.9477348923683167
          vf_loss: 0.1218241335884911
    num_agent_steps_sampled: 7247010
    num_agent_steps_trained: 7247010
    num_steps_sampled: 7247010
    num_steps_trained: 7247

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,785,406092,7247010,4.9254,15.69,-0.56,53.5615




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7257006
  custom_metrics: {}
  date: 2021-11-23_11-46-17
  done: false
  episode_len_mean: 53.38502673796791
  episode_media: {}
  episode_reward_max: 15.590000000000007
  episode_reward_mean: 5.112673796791448
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 187
  episodes_total: 140424
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0484687426722195
          entropy_coeff: 0.01
          kl: 0.014630051739284824
          policy_loss: -0.0692955469365473
          total_loss: 0.0725918811270601
          vf_explained_var: 0.9376406073570251
          vf_loss: 0.1290430268265277
    num_agent_steps_sampled: 7257006
    num_agent_steps_trained: 7257006
    num_steps_sampled: 7257006
    num_steps_trained: 72570

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,786,406649,7257006,5.11267,15.59,-0.51,53.385


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7267002
  custom_metrics: {}
  date: 2021-11-23_11-55-09
  done: false
  episode_len_mean: 54.11290322580645
  episode_media: {}
  episode_reward_max: 15.570000000000007
  episode_reward_mean: 5.339838709677424
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 186
  episodes_total: 140610
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0288478426904564
          entropy_coeff: 0.01
          kl: 0.015662677595017625
          policy_loss: -0.06472535785367975
          total_loss: 0.10243502464328344
          vf_explained_var: 0.9362277388572693
          vf_loss: 0.15176732254646583
    num_agent_steps_sampled: 7267002
    num_agent_steps_trained: 7267002
    num_steps_sampled: 7267002
    num_steps_trained: 72

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,787,407181,7267002,5.33984,15.57,-0.53,54.1129




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7276998
  custom_metrics: {}
  date: 2021-11-23_12-04-14
  done: false
  episode_len_mean: 53.96756756756757
  episode_media: {}
  episode_reward_max: 17.470000000000006
  episode_reward_mean: 5.6224864864864905
  episode_reward_min: -0.4400000000000002
  episodes_this_iter: 185
  episodes_total: 140795
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.008466033236569
          entropy_coeff: 0.01
          kl: 0.014587112013510206
          policy_loss: -0.06925712056663359
          total_loss: 0.07284083062233884
          vf_explained_var: 0.9476286172866821
          vf_loss: 0.12895134626074234
    num_agent_steps_sampled: 7276998
    num_agent_steps_trained: 7276998
    num_steps_sampled: 7276998
    num_steps_trained: 72

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,788,407726,7276998,5.62249,17.47,-0.44,53.9676




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7286994
  custom_metrics: {}
  date: 2021-11-23_12-13-42
  done: false
  episode_len_mean: 54.52459016393443
  episode_media: {}
  episode_reward_max: 15.46000000000001
  episode_reward_mean: 5.2193442622950865
  episode_reward_min: -0.7400000000000004
  episodes_this_iter: 183
  episodes_total: 140978
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.042603491779312
          entropy_coeff: 0.01
          kl: 0.015315476682477923
          policy_loss: -0.07101489298322722
          total_loss: 0.08940161947966498
          vf_explained_var: 0.9256529211997986
          vf_loss: 0.1459519766145816
    num_agent_steps_sampled: 7286994
    num_agent_steps_trained: 7286994
    num_steps_sampled: 7286994
    num_steps_trained: 7286

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,789,408294,7286994,5.21934,15.46,-0.74,54.5246




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7296990
  custom_metrics: {}
  date: 2021-11-23_12-23-03
  done: false
  episode_len_mean: 53.48663101604278
  episode_media: {}
  episode_reward_max: 15.650000000000006
  episode_reward_mean: 5.29352941176471
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 187
  episodes_total: 141165
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.022129283731721
          entropy_coeff: 0.01
          kl: 0.015041844766807424
          policy_loss: -0.067885676525662
          total_loss: 0.07751580776254105
          vf_explained_var: 0.9526882767677307
          vf_loss: 0.13135557336186668
    num_agent_steps_sampled: 7296990
    num_agent_steps_trained: 7296990
    num_steps_sampled: 7296990
    num_steps_trained: 72969

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,790,408855,7296990,5.29353,15.65,-0.49,53.4866




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7306986
  custom_metrics: {}
  date: 2021-11-23_12-32-10
  done: false
  episode_len_mean: 53.82795698924731
  episode_media: {}
  episode_reward_max: 19.479999999999983
  episode_reward_mean: 5.718064516129036
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 186
  episodes_total: 141351
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.978862895568212
          entropy_coeff: 0.01
          kl: 0.01456302346013322
          policy_loss: -0.06957649425826695
          total_loss: 0.07676710866666304
          vf_explained_var: 0.9490752220153809
          vf_loss: 0.132955842855364
    num_agent_steps_sampled: 7306986
    num_agent_steps_trained: 7306986
    num_steps_sampled: 7306986
    num_steps_trained: 730698

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,791,409402,7306986,5.71806,19.48,-0.51,53.828




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7316982
  custom_metrics: {}
  date: 2021-11-23_12-41-15
  done: false
  episode_len_mean: 53.76216216216216
  episode_media: {}
  episode_reward_max: 17.589999999999993
  episode_reward_mean: 5.690108108108113
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 185
  episodes_total: 141536
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.990168864348806
          entropy_coeff: 0.01
          kl: 0.015212039289364302
          policy_loss: -0.06295175605328077
          total_loss: 0.11080886923892493
          vf_explained_var: 0.9354441165924072
          vf_loss: 0.1590073846670496
    num_agent_steps_sampled: 7316982
    num_agent_steps_trained: 7316982
    num_steps_sampled: 7316982
    num_steps_trained: 7316

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,792,409947,7316982,5.69011,17.59,-0.5,53.7622




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7326978
  custom_metrics: {}
  date: 2021-11-23_12-50-34
  done: false
  episode_len_mean: 53.36363636363637
  episode_media: {}
  episode_reward_max: 13.590000000000007
  episode_reward_mean: 4.864385026737972
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 187
  episodes_total: 141723
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.049448858518677
          entropy_coeff: 0.01
          kl: 0.01490357938279307
          policy_loss: -0.06875898259182343
          total_loss: 0.07663995508176534
          vf_explained_var: 0.9335517883300781
          vf_loss: 0.13194120824439862
    num_agent_steps_sampled: 7326978
    num_agent_steps_trained: 7326978
    num_steps_sampled: 7326978
    num_steps_trained: 7326

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,793,410506,7326978,4.86439,13.59,-0.53,53.3636




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7336974
  custom_metrics: {}
  date: 2021-11-23_12-59-40
  done: false
  episode_len_mean: 53.68279569892473
  episode_media: {}
  episode_reward_max: 13.700000000000005
  episode_reward_mean: 5.076021505376349
  episode_reward_min: -0.5700000000000003
  episodes_this_iter: 186
  episodes_total: 141909
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0756087485566197
          entropy_coeff: 0.01
          kl: 0.014545203193747611
          policy_loss: -0.0691787973437159
          total_loss: 0.07664002871591231
          vf_explained_var: 0.9164425134658813
          vf_loss: 0.1334391208730234
    num_agent_steps_sampled: 7336974
    num_agent_steps_trained: 7336974
    num_steps_sampled: 7336974
    num_steps_trained: 7336

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,794,411052,7336974,5.07602,13.7,-0.57,53.6828




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7346970
  custom_metrics: {}
  date: 2021-11-23_13-08-58
  done: false
  episode_len_mean: 54.36413043478261
  episode_media: {}
  episode_reward_max: 15.550000000000008
  episode_reward_mean: 4.9816304347826135
  episode_reward_min: -0.6900000000000004
  episodes_this_iter: 184
  episodes_total: 142093
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.032797088871998
          entropy_coeff: 0.01
          kl: 0.014155335037117741
          policy_loss: -0.07106163044491734
          total_loss: 0.06083297734102339
          vf_explained_var: 0.9381951689720154
          vf_loss: 0.11997495567010917
    num_agent_steps_sampled: 7346970
    num_agent_steps_trained: 7346970
    num_steps_sampled: 7346970
    num_steps_trained: 73

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,795,411610,7346970,4.98163,15.55,-0.69,54.3641




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7356966
  custom_metrics: {}
  date: 2021-11-23_13-18-01
  done: false
  episode_len_mean: 54.78021978021978
  episode_media: {}
  episode_reward_max: 13.610000000000005
  episode_reward_mean: 5.4126923076923115
  episode_reward_min: -0.5900000000000003
  episodes_this_iter: 182
  episodes_total: 142275
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0451969888555
          entropy_coeff: 0.01
          kl: 0.013469428634949467
          policy_loss: -0.06587870057368542
          total_loss: 0.056029942112535897
          vf_explained_var: 0.9569765329360962
          vf_loss: 0.11167556951448965
    num_agent_steps_sampled: 7356966
    num_agent_steps_trained: 7356966
    num_steps_sampled: 7356966
    num_steps_trained: 735

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,796,412152,7356966,5.41269,13.61,-0.59,54.7802


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7366962
  custom_metrics: {}
  date: 2021-11-23_13-26-50
  done: false
  episode_len_mean: 55.54945054945055
  episode_media: {}
  episode_reward_max: 15.660000000000005
  episode_reward_mean: 5.318846153846159
  episode_reward_min: -0.6300000000000003
  episodes_this_iter: 182
  episodes_total: 142457
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.059312908764345
          entropy_coeff: 0.01
          kl: 0.015023128891271149
          policy_loss: -0.07088506489465701
          total_loss: 0.0796573598228364
          vf_explained_var: 0.9314090609550476
          vf_loss: 0.13691098744762187
    num_agent_steps_sampled: 7366962
    num_agent_steps_trained: 7366962
    num_steps_sampled: 7366962
    num_steps_trained: 7366

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,797,412682,7366962,5.31885,15.66,-0.63,55.5495


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7376958
  custom_metrics: {}
  date: 2021-11-23_13-35-42
  done: false
  episode_len_mean: 55.12222222222222
  episode_media: {}
  episode_reward_max: 15.600000000000007
  episode_reward_mean: 5.340388888888894
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 180
  episodes_total: 142637
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0494095906675103
          entropy_coeff: 0.01
          kl: 0.01465198344780278
          policy_loss: -0.06945584055705052
          total_loss: 0.07088353144131818
          vf_explained_var: 0.9485306739807129
          vf_loss: 0.12745441602380295
    num_agent_steps_sampled: 7376958
    num_agent_steps_trained: 7376958
    num_steps_sampled: 7376958
    num_steps_trained: 737

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,798,413214,7376958,5.34039,15.6,-0.55,55.1222




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7386954
  custom_metrics: {}
  date: 2021-11-23_13-44-57
  done: false
  episode_len_mean: 55.082417582417584
  episode_media: {}
  episode_reward_max: 17.620000000000008
  episode_reward_mean: 4.968241758241762
  episode_reward_min: -0.6300000000000003
  episodes_this_iter: 182
  episodes_total: 142819
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.07389971503771
          entropy_coeff: 0.01
          kl: 0.014751172455213847
          policy_loss: -0.06945301965655065
          total_loss: 0.08292999214511812
          vf_explained_var: 0.9329780340194702
          vf_loss: 0.139516993113382
    num_agent_steps_sampled: 7386954
    num_agent_steps_trained: 7386954
    num_steps_sampled: 7386954
    num_steps_trained: 73869

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,799,413769,7386954,4.96824,17.62,-0.63,55.0824




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7396950
  custom_metrics: {}
  date: 2021-11-23_13-54-01
  done: false
  episode_len_mean: 54.69945355191257
  episode_media: {}
  episode_reward_max: 19.499999999999986
  episode_reward_mean: 5.4486338797814255
  episode_reward_min: -0.6000000000000003
  episodes_this_iter: 183
  episodes_total: 143002
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0568876986762126
          entropy_coeff: 0.01
          kl: 0.014439168145623333
          policy_loss: -0.07159508274526015
          total_loss: 0.07582535598270784
          vf_explained_var: 0.9182358384132385
          vf_loss: 0.13509508493939887
    num_agent_steps_sampled: 7396950
    num_agent_steps_trained: 7396950
    num_steps_sampled: 7396950
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,800,414313,7396950,5.44863,19.5,-0.6,54.6995


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7406946
  custom_metrics: {}
  date: 2021-11-23_14-02-50
  done: false
  episode_len_mean: 55.22222222222222
  episode_media: {}
  episode_reward_max: 17.45
  episode_reward_mean: 5.350444444444449
  episode_reward_min: -0.5600000000000003
  episodes_this_iter: 180
  episodes_total: 143182
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.045680181041779
          entropy_coeff: 0.01
          kl: 0.014513240071128362
          policy_loss: -0.06847561663476319
          total_loss: 0.08223113886300393
          vf_explained_var: 0.9119109511375427
          vf_loss: 0.1381005806759576
    num_agent_steps_sampled: 7406946
    num_agent_steps_trained: 7406946
    num_steps_sampled: 7406946
    num_steps_trained: 7406946
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,801,414841,7406946,5.35044,17.45,-0.56,55.2222




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7416942
  custom_metrics: {}
  date: 2021-11-23_14-11-53
  done: false
  episode_len_mean: 55.60220994475138
  episode_media: {}
  episode_reward_max: 15.46000000000001
  episode_reward_mean: 4.919005524861883
  episode_reward_min: -0.6200000000000003
  episodes_this_iter: 181
  episodes_total: 143363
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.052569691806912
          entropy_coeff: 0.01
          kl: 0.014174237914515116
          policy_loss: -0.07045870809190062
          total_loss: 0.0747567958255249
          vf_explained_var: 0.9243689775466919
          vf_loss: 0.13345051483058532
    num_agent_steps_sampled: 7416942
    num_agent_steps_trained: 7416942
    num_steps_sampled: 7416942
    num_steps_trained: 74169

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,802,415385,7416942,4.91901,15.46,-0.62,55.6022




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7426938
  custom_metrics: {}
  date: 2021-11-23_14-21-08
  done: false
  episode_len_mean: 54.666666666666664
  episode_media: {}
  episode_reward_max: 17.47999999999999
  episode_reward_mean: 5.429562841530059
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 183
  episodes_total: 143546
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.041600982850814
          entropy_coeff: 0.01
          kl: 0.014350078644763883
          policy_loss: -0.0694951515914905
          total_loss: 0.07435989650156914
          vf_explained_var: 0.9470363855361938
          vf_loss: 0.13157978323680988
    num_agent_steps_sampled: 7426938
    num_agent_steps_trained: 7426938
    num_steps_sampled: 7426938
    num_steps_trained: 742

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,803,415939,7426938,5.42956,17.48,-0.48,54.6667




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7436934
  custom_metrics: {}
  date: 2021-11-23_14-30-07
  done: false
  episode_len_mean: 55.79329608938548
  episode_media: {}
  episode_reward_max: 15.580000000000007
  episode_reward_mean: 5.241787709497212
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 179
  episodes_total: 143725
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.044591649648655
          entropy_coeff: 0.01
          kl: 0.015288456797380669
          policy_loss: -0.06993143669715063
          total_loss: 0.08222775820562762
          vf_explained_var: 0.9457097053527832
          vf_loss: 0.13777609435910637
    num_agent_steps_sampled: 7436934
    num_agent_steps_trained: 7436934
    num_steps_sampled: 7436934
    num_steps_trained: 743

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,804,416479,7436934,5.24179,15.58,-0.55,55.7933




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7446930
  custom_metrics: {}
  date: 2021-11-23_14-39-11
  done: false
  episode_len_mean: 55.90395480225989
  episode_media: {}
  episode_reward_max: 15.500000000000009
  episode_reward_mean: 5.5018079096045245
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 177
  episodes_total: 143902
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.025210386801915
          entropy_coeff: 0.01
          kl: 0.014630977434046183
          policy_loss: -0.0702582816681559
          total_loss: 0.0701446912274227
          vf_explained_var: 0.937987208366394
          vf_loss: 0.12732388090106647
    num_agent_steps_sampled: 7446930
    num_agent_steps_trained: 7446930
    num_steps_sampled: 7446930
    num_steps_trained: 74469

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,805,417023,7446930,5.50181,15.5,-0.54,55.904




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7456926
  custom_metrics: {}
  date: 2021-11-23_14-48-13
  done: false
  episode_len_mean: 55.31868131868132
  episode_media: {}
  episode_reward_max: 13.600000000000007
  episode_reward_mean: 5.066428571428576
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 182
  episodes_total: 144084
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0484583988965275
          entropy_coeff: 0.01
          kl: 0.014423309675705025
          policy_loss: -0.0716663234644596
          total_loss: 0.06309853952108539
          vf_explained_var: 0.9484769105911255
          vf_loss: 0.12239134388005204
    num_agent_steps_sampled: 7456926
    num_agent_steps_trained: 7456926
    num_steps_sampled: 7456926
    num_steps_trained: 745

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,806,417564,7456926,5.06643,13.6,-0.51,55.3187




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7466922
  custom_metrics: {}
  date: 2021-11-23_14-57-41
  done: false
  episode_len_mean: 54.8021978021978
  episode_media: {}
  episode_reward_max: 17.540000000000006
  episode_reward_mean: 4.719340659340664
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 182
  episodes_total: 144266
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.073201417779348
          entropy_coeff: 0.01
          kl: 0.014769541676278128
          policy_loss: -0.0728211385804954
          total_loss: 0.06066310478619056
          vf_explained_var: 0.9329639077186584
          vf_loss: 0.12056939398774495
    num_agent_steps_sampled: 7466922
    num_agent_steps_trained: 7466922
    num_steps_sampled: 7466922
    num_steps_trained: 74669

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,807,418132,7466922,4.71934,17.54,-0.52,54.8022


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7476918
  custom_metrics: {}
  date: 2021-11-23_15-06-28
  done: false
  episode_len_mean: 55.78212290502793
  episode_media: {}
  episode_reward_max: 13.560000000000008
  episode_reward_mean: 4.875307262569836
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 179
  episodes_total: 144445
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.050394421648309
          entropy_coeff: 0.01
          kl: 0.014072566993122837
          policy_loss: -0.07583576357012288
          total_loss: 0.052212385033907405
          vf_explained_var: 0.944295346736908
          vf_loss: 0.11649302580042267
    num_agent_steps_sampled: 7476918
    num_agent_steps_trained: 7476918
    num_steps_sampled: 7476918
    num_steps_trained: 747

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,808,418660,7476918,4.87531,13.56,-0.55,55.7821




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7486914
  custom_metrics: {}
  date: 2021-11-23_15-15-31
  done: false
  episode_len_mean: 55.137362637362635
  episode_media: {}
  episode_reward_max: 15.43000000000001
  episode_reward_mean: 5.662857142857148
  episode_reward_min: -0.6300000000000003
  episodes_this_iter: 182
  episodes_total: 144627
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0286678192605936
          entropy_coeff: 0.01
          kl: 0.015464479157586353
          policy_loss: -0.06696165370262441
          total_loss: 0.09948939670838769
          vf_explained_var: 0.9347974061965942
          vf_loss: 0.15150771083003545
    num_agent_steps_sampled: 7486914
    num_agent_steps_trained: 7486914
    num_steps_sampled: 7486914
    num_steps_trained: 74

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,809,419203,7486914,5.66286,15.43,-0.63,55.1374




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7496910
  custom_metrics: {}
  date: 2021-11-23_15-24-35
  done: false
  episode_len_mean: 54.29891304347826
  episode_media: {}
  episode_reward_max: 19.60999999999998
  episode_reward_mean: 5.580706521739135
  episode_reward_min: -0.6200000000000003
  episodes_this_iter: 184
  episodes_total: 144811
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0433687905949283
          entropy_coeff: 0.01
          kl: 0.014457898514649868
          policy_loss: -0.06878819993639425
          total_loss: 0.08887586473736292
          vf_explained_var: 0.9456122517585754
          vf_loss: 0.14516085115921532
    num_agent_steps_sampled: 7496910
    num_agent_steps_trained: 7496910
    num_steps_sampled: 7496910
    num_steps_trained: 749

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,810,419747,7496910,5.58071,19.61,-0.62,54.2989




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7506906
  custom_metrics: {}
  date: 2021-11-23_15-33-53
  done: false
  episode_len_mean: 54.18478260869565
  episode_media: {}
  episode_reward_max: 15.510000000000009
  episode_reward_mean: 5.498804347826092
  episode_reward_min: -0.6600000000000004
  episodes_this_iter: 184
  episodes_total: 144995
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0601319915558918
          entropy_coeff: 0.01
          kl: 0.015548713574275504
          policy_loss: -0.07077244627908369
          total_loss: 0.0968094612982479
          vf_explained_var: 0.9455729126930237
          vf_loss: 0.15276131249223668
    num_agent_steps_sampled: 7506906
    num_agent_steps_trained: 7506906
    num_steps_sampled: 7506906
    num_steps_trained: 750

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,811,420305,7506906,5.4988,15.51,-0.66,54.1848




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7516902
  custom_metrics: {}
  date: 2021-11-23_15-43-03
  done: false
  episode_len_mean: 53.45989304812834
  episode_media: {}
  episode_reward_max: 19.43999999999998
  episode_reward_mean: 5.548395721925137
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 187
  episodes_total: 145182
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0666259320864238
          entropy_coeff: 0.01
          kl: 0.015198197039804013
          policy_loss: -0.0677294483282376
          total_loss: 0.09488539646915203
          vf_explained_var: 0.9542728662490845
          vf_loss: 0.14865771021233895
    num_agent_steps_sampled: 7516902
    num_agent_steps_trained: 7516902
    num_steps_sampled: 7516902
    num_steps_trained: 7516

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,812,420854,7516902,5.5484,19.44,-0.53,53.4599


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7526898
  custom_metrics: {}
  date: 2021-11-23_15-51-56
  done: false
  episode_len_mean: 54.32608695652174
  episode_media: {}
  episode_reward_max: 15.620000000000006
  episode_reward_mean: 5.190326086956526
  episode_reward_min: -0.6000000000000003
  episodes_this_iter: 184
  episodes_total: 145366
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.028573202967165
          entropy_coeff: 0.01
          kl: 0.015136779233691956
          policy_loss: -0.06736965018960864
          total_loss: 0.09291014960773943
          vf_explained_var: 0.9466860890388489
          vf_loss: 0.14608205586197753
    num_agent_steps_sampled: 7526898
    num_agent_steps_trained: 7526898
    num_steps_sampled: 7526898
    num_steps_trained: 752

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,813,421387,7526898,5.19033,15.62,-0.6,54.3261


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7536894
  custom_metrics: {}
  date: 2021-11-23_16-00-50
  done: false
  episode_len_mean: 54.043243243243246
  episode_media: {}
  episode_reward_max: 13.580000000000007
  episode_reward_mean: 5.155081081081086
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 185
  episodes_total: 145551
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0613403080457666
          entropy_coeff: 0.01
          kl: 0.013925739924761423
          policy_loss: -0.07018738339452778
          total_loss: 0.06232750229554508
          vf_explained_var: 0.9438478350639343
          vf_loss: 0.12140371068549084
    num_agent_steps_sampled: 7536894
    num_agent_steps_trained: 7536894
    num_steps_sampled: 7536894
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,814,421921,7536894,5.15508,13.58,-0.5,54.0432




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7546890
  custom_metrics: {}
  date: 2021-11-23_16-10-00
  done: false
  episode_len_mean: 54.25
  episode_media: {}
  episode_reward_max: 13.700000000000005
  episode_reward_mean: 4.8842391304347865
  episode_reward_min: -0.5900000000000003
  episodes_this_iter: 184
  episodes_total: 145735
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1109852392031963
          entropy_coeff: 0.01
          kl: 0.01399752906938831
          policy_loss: -0.0758241477614798
          total_loss: 0.04953318078667514
          vf_explained_var: 0.9457154870033264
          vf_loss: 0.114579059911731
    num_agent_steps_sampled: 7546890
    num_agent_steps_trained: 7546890
    num_steps_sampled: 7546890
    num_steps_trained: 7546890
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,815,422471,7546890,4.88424,13.7,-0.59,54.25




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7556886
  custom_metrics: {}
  date: 2021-11-23_16-19-26
  done: false
  episode_len_mean: 53.994623655913976
  episode_media: {}
  episode_reward_max: 19.519999999999996
  episode_reward_mean: 5.372903225806456
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 186
  episodes_total: 145921
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0481115410126836
          entropy_coeff: 0.01
          kl: 0.014973528050751332
          policy_loss: -0.06687843307736914
          total_loss: 0.08369957614790818
          vf_explained_var: 0.9320113658905029
          vf_loss: 0.13694755536659312
    num_agent_steps_sampled: 7556886
    num_agent_steps_trained: 7556886
    num_steps_sampled: 7556886
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,816,423038,7556886,5.3729,19.52,-0.55,53.9946


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7566882
  custom_metrics: {}
  date: 2021-11-23_16-28-23
  done: false
  episode_len_mean: 53.60215053763441
  episode_media: {}
  episode_reward_max: 15.630000000000006
  episode_reward_mean: 5.007903225806455
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 186
  episodes_total: 146107
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0865845464080213
          entropy_coeff: 0.01
          kl: 0.014802103998199414
          policy_loss: -0.06673907746430284
          total_loss: 0.0853663141476884
          vf_explained_var: 0.9361169338226318
          vf_loss: 0.13925019262169663
    num_agent_steps_sampled: 7566882
    num_agent_steps_trained: 7566882
    num_steps_sampled: 7566882
    num_steps_trained: 756

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,817,423574,7566882,5.0079,15.63,-0.52,53.6022




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7576878
  custom_metrics: {}
  date: 2021-11-23_16-37-45
  done: false
  episode_len_mean: 53.924731182795696
  episode_media: {}
  episode_reward_max: 15.630000000000006
  episode_reward_mean: 5.541989247311832
  episode_reward_min: -0.6100000000000003
  episodes_this_iter: 186
  episodes_total: 146293
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.052023299439365
          entropy_coeff: 0.01
          kl: 0.014358573563759969
          policy_loss: -0.06618365444642314
          total_loss: 0.07213644026674466
          vf_explained_var: 0.9530710577964783
          vf_loss: 0.12612970113580352
    num_agent_steps_sampled: 7576878
    num_agent_steps_trained: 7576878
    num_steps_sampled: 7576878
    num_steps_trained: 75

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,818,424136,7576878,5.54199,15.63,-0.61,53.9247




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7586874
  custom_metrics: {}
  date: 2021-11-23_16-47-02
  done: false
  episode_len_mean: 52.8563829787234
  episode_media: {}
  episode_reward_max: 15.620000000000006
  episode_reward_mean: 5.3811170212766
  episode_reward_min: -0.5600000000000003
  episodes_this_iter: 188
  episodes_total: 146481
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0592285951217972
          entropy_coeff: 0.01
          kl: 0.014649875330415216
          policy_loss: -0.06994807634189061
          total_loss: 0.08724725504927708
          vf_explained_var: 0.9456368684768677
          vf_loss: 0.14441336816677966
    num_agent_steps_sampled: 7586874
    num_agent_steps_trained: 7586874
    num_steps_sampled: 7586874
    num_steps_trained: 75868

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,819,424693,7586874,5.38112,15.62,-0.56,52.8564


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7596870
  custom_metrics: {}
  date: 2021-11-23_16-55-57
  done: false
  episode_len_mean: 54.60109289617486
  episode_media: {}
  episode_reward_max: 15.600000000000007
  episode_reward_mean: 5.3600546448087485
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 183
  episodes_total: 146664
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.046899280682146
          entropy_coeff: 0.01
          kl: 0.01444949256048097
          policy_loss: -0.07208169542832932
          total_loss: 0.06767234320232018
          vf_explained_var: 0.9498216509819031
          vf_loss: 0.127305280768507
    num_agent_steps_sampled: 7596870
    num_agent_steps_trained: 7596870
    num_steps_sampled: 7596870
    num_steps_trained: 75968

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,820,425228,7596870,5.36005,15.6,-0.55,54.6011


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7606866
  custom_metrics: {}
  date: 2021-11-23_17-04-48
  done: false
  episode_len_mean: 54.91256830601093
  episode_media: {}
  episode_reward_max: 13.590000000000007
  episode_reward_mean: 5.456284153005468
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 183
  episodes_total: 146847
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0592621528240573
          entropy_coeff: 0.01
          kl: 0.015045035610641485
          policy_loss: -0.06960536205188292
          total_loss: 0.08170252854654181
          vf_explained_var: 0.9385778307914734
          vf_loss: 0.13762603889670164
    num_agent_steps_sampled: 7606866
    num_agent_steps_trained: 7606866
    num_steps_sampled: 7606866
    num_steps_trained: 76

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,821,425759,7606866,5.45628,13.59,-0.5,54.9126




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7616862
  custom_metrics: {}
  date: 2021-11-23_17-13-55
  done: false
  episode_len_mean: 54.167567567567566
  episode_media: {}
  episode_reward_max: 13.640000000000006
  episode_reward_mean: 5.368324324324329
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 185
  episodes_total: 147032
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.080282715549431
          entropy_coeff: 0.01
          kl: 0.014818771884485932
          policy_loss: -0.06689284198782332
          total_loss: 0.07714381529888242
          vf_explained_var: 0.9436091780662537
          vf_loss: 0.13108046899744155
    num_agent_steps_sampled: 7616862
    num_agent_steps_trained: 7616862
    num_steps_sampled: 7616862
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,822,426306,7616862,5.36832,13.64,-0.49,54.1676




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7626858
  custom_metrics: {}
  date: 2021-11-23_17-23-14
  done: false
  episode_len_mean: 53.0855614973262
  episode_media: {}
  episode_reward_max: 15.550000000000008
  episode_reward_mean: 5.0980748663101645
  episode_reward_min: -0.5600000000000003
  episodes_this_iter: 187
  episodes_total: 147219
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0563204154192682
          entropy_coeff: 0.01
          kl: 0.013947066277852659
          policy_loss: -0.07499870933040574
          total_loss: 0.05515514624309569
          vf_explained_var: 0.9457065463066101
          vf_loss: 0.11894389867182283
    num_agent_steps_sampled: 7626858
    num_agent_steps_trained: 7626858
    num_steps_sampled: 7626858
    num_steps_trained: 76

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,823,426865,7626858,5.09807,15.55,-0.56,53.0856


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7636854
  custom_metrics: {}
  date: 2021-11-23_17-32-06
  done: false
  episode_len_mean: 54.25945945945946
  episode_media: {}
  episode_reward_max: 15.560000000000008
  episode_reward_mean: 5.495513513513518
  episode_reward_min: -0.5600000000000003
  episodes_this_iter: 185
  episodes_total: 147404
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0510766255807686
          entropy_coeff: 0.01
          kl: 0.015434586752631463
          policy_loss: -0.06683328841297762
          total_loss: 0.09601343927211585
          vf_explained_var: 0.944409191608429
          vf_loss: 0.14819557437725575
    num_agent_steps_sampled: 7636854
    num_agent_steps_trained: 7636854
    num_steps_sampled: 7636854
    num_steps_trained: 763

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,824,427397,7636854,5.49551,15.56,-0.56,54.2595




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7646850
  custom_metrics: {}
  date: 2021-11-23_17-41-13
  done: false
  episode_len_mean: 53.3048128342246
  episode_media: {}
  episode_reward_max: 15.580000000000007
  episode_reward_mean: 5.041657754010699
  episode_reward_min: -0.5900000000000003
  episodes_this_iter: 187
  episodes_total: 147591
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.091117333982843
          entropy_coeff: 0.01
          kl: 0.014087306269878863
          policy_loss: -0.0676450557120923
          total_loss: 0.07430678356539862
          vf_explained_var: 0.9358588457107544
          vf_loss: 0.13077036795621536
    num_agent_steps_sampled: 7646850
    num_agent_steps_trained: 7646850
    num_steps_sampled: 7646850
    num_steps_trained: 76468

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,825,427944,7646850,5.04166,15.58,-0.59,53.3048




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7656846
  custom_metrics: {}
  date: 2021-11-23_17-50-38
  done: false
  episode_len_mean: 53.96756756756757
  episode_media: {}
  episode_reward_max: 13.680000000000005
  episode_reward_mean: 5.453189189189194
  episode_reward_min: -0.6100000000000003
  episodes_this_iter: 185
  episodes_total: 147776
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0751068845092053
          entropy_coeff: 0.01
          kl: 0.014548158956322606
          policy_loss: -0.06866039303788284
          total_loss: 0.07154159007791551
          vf_explained_var: 0.9432525038719177
          vf_loss: 0.1278105261211048
    num_agent_steps_sampled: 7656846
    num_agent_steps_trained: 7656846
    num_steps_sampled: 7656846
    num_steps_trained: 765

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,826,428509,7656846,5.45319,13.68,-0.61,53.9676




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7666842
  custom_metrics: {}
  date: 2021-11-23_17-59-44
  done: false
  episode_len_mean: 53.946236559139784
  episode_media: {}
  episode_reward_max: 13.660000000000005
  episode_reward_mean: 4.799301075268821
  episode_reward_min: -0.5600000000000004
  episodes_this_iter: 186
  episodes_total: 147962
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.101910403622202
          entropy_coeff: 0.01
          kl: 0.014606777559661452
          policy_loss: -0.07158009817378379
          total_loss: 0.0600928793469477
          vf_explained_var: 0.9245572090148926
          vf_loss: 0.11941601621720146
    num_agent_steps_sampled: 7666842
    num_agent_steps_trained: 7666842
    num_steps_sampled: 7666842
    num_steps_trained: 766

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,827,429055,7666842,4.7993,13.66,-0.56,53.9462


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7676838
  custom_metrics: {}
  date: 2021-11-23_18-08-37
  done: false
  episode_len_mean: 54.21739130434783
  episode_media: {}
  episode_reward_max: 15.560000000000008
  episode_reward_mean: 4.911413043478265
  episode_reward_min: -0.5700000000000003
  episodes_this_iter: 184
  episodes_total: 148146
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.072638106322193
          entropy_coeff: 0.01
          kl: 0.014223256941696289
          policy_loss: -0.07061623206983828
          total_loss: 0.06624695950413444
          vf_explained_var: 0.9295280575752258
          vf_loss: 0.12518721601362884
    num_agent_steps_sampled: 7676838
    num_agent_steps_trained: 7676838
    num_steps_sampled: 7676838
    num_steps_trained: 767

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,828,429588,7676838,4.91141,15.56,-0.57,54.2174


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7686834
  custom_metrics: {}
  date: 2021-11-23_18-17-31
  done: false
  episode_len_mean: 53.74193548387097
  episode_media: {}
  episode_reward_max: 15.640000000000006
  episode_reward_mean: 5.40634408602151
  episode_reward_min: -0.7500000000000004
  episodes_this_iter: 186
  episodes_total: 148332
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0595484375714297
          entropy_coeff: 0.01
          kl: 0.01424598582773796
          policy_loss: -0.06815737721519115
          total_loss: 0.07941245777444701
          vf_explained_var: 0.9398280382156372
          vf_loss: 0.13571118152827533
    num_agent_steps_sampled: 7686834
    num_agent_steps_trained: 7686834
    num_steps_sampled: 7686834
    num_steps_trained: 7686

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,829,430122,7686834,5.40634,15.64,-0.75,53.7419




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7696830
  custom_metrics: {}
  date: 2021-11-23_18-26-41
  done: false
  episode_len_mean: 53.00529100529101
  episode_media: {}
  episode_reward_max: 15.560000000000008
  episode_reward_mean: 5.097619047619053
  episode_reward_min: -0.7300000000000004
  episodes_this_iter: 189
  episodes_total: 148521
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.068491136261737
          entropy_coeff: 0.01
          kl: 0.014535707388842713
          policy_loss: -0.06459202681623673
          total_loss: 0.09519159345517579
          vf_explained_var: 0.9345006942749023
          vf_loss: 0.14735437236000676
    num_agent_steps_sampled: 7696830
    num_agent_steps_trained: 7696830
    num_steps_sampled: 7696830
    num_steps_trained: 769

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,830,430672,7696830,5.09762,15.56,-0.73,53.0053




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7706826
  custom_metrics: {}
  date: 2021-11-23_18-36-05
  done: false
  episode_len_mean: 53.56989247311828
  episode_media: {}
  episode_reward_max: 15.600000000000005
  episode_reward_mean: 5.424731182795703
  episode_reward_min: -0.5600000000000003
  episodes_this_iter: 186
  episodes_total: 148707
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.075827639792339
          entropy_coeff: 0.01
          kl: 0.014533619746199843
          policy_loss: -0.07211176947060899
          total_loss: 0.07427299603193122
          vf_explained_var: 0.9523161053657532
          vf_loss: 0.13403363851979974
    num_agent_steps_sampled: 7706826
    num_agent_steps_trained: 7706826
    num_steps_sampled: 7706826
    num_steps_trained: 770

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,831,431235,7706826,5.42473,15.6,-0.56,53.5699


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7716822
  custom_metrics: {}
  date: 2021-11-23_18-44-57
  done: false
  episode_len_mean: 54.23783783783784
  episode_media: {}
  episode_reward_max: 15.570000000000007
  episode_reward_mean: 5.247729729729734
  episode_reward_min: -0.4300000000000002
  episodes_this_iter: 185
  episodes_total: 148892
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0454674940511404
          entropy_coeff: 0.01
          kl: 0.014578104815078268
          policy_loss: -0.07003658812448933
          total_loss: 0.0635166102975862
          vf_explained_var: 0.9570499658584595
          vf_loss: 0.12079712765209048
    num_agent_steps_sampled: 7716822
    num_agent_steps_trained: 7716822
    num_steps_sampled: 7716822
    num_steps_trained: 771

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,832,431767,7716822,5.24773,15.57,-0.43,54.2378


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7726818
  custom_metrics: {}
  date: 2021-11-23_18-53-53
  done: false
  episode_len_mean: 53.68817204301075
  episode_media: {}
  episode_reward_max: 13.630000000000006
  episode_reward_mean: 4.869516129032262
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 186
  episodes_total: 149078
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0936361703288604
          entropy_coeff: 0.01
          kl: 0.013795658800681463
          policy_loss: -0.07333715167706725
          total_loss: 0.05617226800019841
          vf_explained_var: 0.9453451633453369
          vf_loss: 0.11901754498393854
    num_agent_steps_sampled: 7726818
    num_agent_steps_trained: 7726818
    num_steps_sampled: 7726818
    num_steps_trained: 77

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,833,432303,7726818,4.86952,13.63,-0.53,53.6882




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7736814
  custom_metrics: {}
  date: 2021-11-23_19-03-15
  done: false
  episode_len_mean: 54.043243243243246
  episode_media: {}
  episode_reward_max: 15.570000000000007
  episode_reward_mean: 5.108810810810814
  episode_reward_min: -0.5900000000000003
  episodes_this_iter: 185
  episodes_total: 149263
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.06768493049116
          entropy_coeff: 0.01
          kl: 0.015594401655799704
          policy_loss: -0.0670823057960574
          total_loss: 0.09364193712269689
          vf_explained_var: 0.9387974739074707
          vf_loss: 0.14587509473608096
    num_agent_steps_sampled: 7736814
    num_agent_steps_trained: 7736814
    num_steps_sampled: 7736814
    num_steps_trained: 7736

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,834,432866,7736814,5.10881,15.57,-0.59,54.0432




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7746810
  custom_metrics: {}
  date: 2021-11-23_19-12-20
  done: false
  episode_len_mean: 54.23913043478261
  episode_media: {}
  episode_reward_max: 15.670000000000005
  episode_reward_mean: 5.369184782608699
  episode_reward_min: -0.6300000000000003
  episodes_this_iter: 184
  episodes_total: 149447
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0712782955313305
          entropy_coeff: 0.01
          kl: 0.015868586473539364
          policy_loss: -0.06831365432667133
          total_loss: 0.08460998399512676
          vf_explained_var: 0.9411525130271912
          vf_loss: 0.13748579657548687
    num_agent_steps_sampled: 7746810
    num_agent_steps_trained: 7746810
    num_steps_sampled: 7746810
    num_steps_trained: 77

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,835,433411,7746810,5.36918,15.67,-0.63,54.2391


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7756806
  custom_metrics: {}
  date: 2021-11-23_19-21-15
  done: false
  episode_len_mean: 54.167567567567566
  episode_media: {}
  episode_reward_max: 13.580000000000007
  episode_reward_mean: 5.238594594594598
  episode_reward_min: -0.6800000000000004
  episodes_this_iter: 185
  episodes_total: 149632
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0879683634842254
          entropy_coeff: 0.01
          kl: 0.014662693656729668
          policy_loss: -0.07366034545206546
          total_loss: 0.07313780733132602
          vf_explained_var: 0.9415249824523926
          vf_loss: 0.13427438709228467
    num_agent_steps_sampled: 7756806
    num_agent_steps_trained: 7756806
    num_steps_sampled: 7756806
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,836,433945,7756806,5.23859,13.58,-0.68,54.1676




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7766802
  custom_metrics: {}
  date: 2021-11-23_19-30-25
  done: false
  episode_len_mean: 53.080213903743314
  episode_media: {}
  episode_reward_max: 17.550000000000008
  episode_reward_mean: 5.336577540106957
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 187
  episodes_total: 149819
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.065494509035325
          entropy_coeff: 0.01
          kl: 0.015016550291203265
          policy_loss: -0.07039467347450085
          total_loss: 0.09558866022757921
          vf_explained_var: 0.9276180863380432
          vf_loss: 0.1524286982733345
    num_agent_steps_sampled: 7766802
    num_agent_steps_trained: 7766802
    num_steps_sampled: 7766802
    num_steps_trained: 776

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,837,434495,7766802,5.33658,17.55,-0.55,53.0802




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7776798
  custom_metrics: {}
  date: 2021-11-23_19-39-37
  done: false
  episode_len_mean: 53.43617021276596
  episode_media: {}
  episode_reward_max: 15.630000000000006
  episode_reward_mean: 5.29329787234043
  episode_reward_min: -0.6000000000000003
  episodes_this_iter: 188
  episodes_total: 150007
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0831099410133667
          entropy_coeff: 0.01
          kl: 0.015270570313367274
          policy_loss: -0.066928229290614
          total_loss: 0.09651111101567554
          vf_explained_var: 0.9449660181999207
          vf_loss: 0.14948217193483856
    num_agent_steps_sampled: 7776798
    num_agent_steps_trained: 7776798
    num_steps_sampled: 7776798
    num_steps_trained: 77767

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,838,435047,7776798,5.2933,15.63,-0.6,53.4362




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7786794
  custom_metrics: {}
  date: 2021-11-23_19-49-11
  done: false
  episode_len_mean: 53.340425531914896
  episode_media: {}
  episode_reward_max: 17.569999999999997
  episode_reward_mean: 5.572872340425536
  episode_reward_min: -0.5900000000000003
  episodes_this_iter: 188
  episodes_total: 150195
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.062883195747812
          entropy_coeff: 0.01
          kl: 0.015668611055578506
          policy_loss: -0.07386077433915211
          total_loss: 0.09355349412231063
          vf_explained_var: 0.9454317092895508
          vf_loss: 0.1523480455135837
    num_agent_steps_sampled: 7786794
    num_agent_steps_trained: 7786794
    num_steps_sampled: 7786794
    num_steps_trained: 778

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,839,435621,7786794,5.57287,17.57,-0.59,53.3404


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7796790
  custom_metrics: {}
  date: 2021-11-23_19-58-08
  done: false
  episode_len_mean: 54.91160220994475
  episode_media: {}
  episode_reward_max: 17.570000000000004
  episode_reward_mean: 4.865690607734811
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 181
  episodes_total: 150376
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.066285946761748
          entropy_coeff: 0.01
          kl: 0.014776069085701985
          policy_loss: -0.07102262627592341
          total_loss: 0.08117671239410244
          vf_explained_var: 0.9407370686531067
          vf_loss: 0.13920046424083637
    num_agent_steps_sampled: 7796790
    num_agent_steps_trained: 7796790
    num_steps_sampled: 7796790
    num_steps_trained: 779

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,840,436159,7796790,4.86569,17.57,-0.51,54.9116




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7806786
  custom_metrics: {}
  date: 2021-11-23_20-08-10
  done: false
  episode_len_mean: 54.31351351351351
  episode_media: {}
  episode_reward_max: 17.509999999999998
  episode_reward_mean: 4.617351351351355
  episode_reward_min: -0.4400000000000002
  episodes_this_iter: 185
  episodes_total: 150561
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0922758231321015
          entropy_coeff: 0.01
          kl: 0.014746779635746334
          policy_loss: -0.07117097724586349
          total_loss: 0.0859650093987431
          vf_explained_var: 0.9282247424125671
          vf_loss: 0.144463735998769
    num_agent_steps_sampled: 7806786
    num_agent_steps_trained: 7806786
    num_steps_sampled: 7806786
    num_steps_trained: 78067

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,841,436760,7806786,4.61735,17.51,-0.44,54.3135




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7816782
  custom_metrics: {}
  date: 2021-11-23_20-17-14
  done: false
  episode_len_mean: 54.404371584699454
  episode_media: {}
  episode_reward_max: 15.560000000000008
  episode_reward_mean: 5.254371584699458
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 183
  episodes_total: 150744
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0493860646184667
          entropy_coeff: 0.01
          kl: 0.014719683450093385
          policy_loss: -0.07187891734802435
          total_loss: 0.07451179881176519
          vf_explained_var: 0.9286801218986511
          vf_loss: 0.13335129694319348
    num_agent_steps_sampled: 7816782
    num_agent_steps_trained: 7816782
    num_steps_sampled: 7816782
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,842,437304,7816782,5.25437,15.56,-0.54,54.4044




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7826778
  custom_metrics: {}
  date: 2021-11-23_20-26-25
  done: false
  episode_len_mean: 53.27807486631016
  episode_media: {}
  episode_reward_max: 15.580000000000002
  episode_reward_mean: 5.379197860962571
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 187
  episodes_total: 150931
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.054016329988418
          entropy_coeff: 0.01
          kl: 0.014867781124119996
          policy_loss: -0.06584654832853755
          total_loss: 0.09209709991490266
          vf_explained_var: 0.9465339779853821
          vf_loss: 0.14461314582011575
    num_agent_steps_sampled: 7826778
    num_agent_steps_trained: 7826778
    num_steps_sampled: 7826778
    num_steps_trained: 782

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,843,437855,7826778,5.3792,15.58,-0.51,53.2781


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7836774
  custom_metrics: {}
  date: 2021-11-23_20-35-19
  done: false
  episode_len_mean: 53.903743315508024
  episode_media: {}
  episode_reward_max: 17.58
  episode_reward_mean: 5.395347593582892
  episode_reward_min: -0.47000000000000025
  episodes_this_iter: 187
  episodes_total: 151118
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0703848834736758
          entropy_coeff: 0.01
          kl: 0.015014887176576137
          policy_loss: -0.07026465868913516
          total_loss: 0.08359880914906398
          vf_explained_var: 0.9473307132720947
          vf_loss: 0.14036152603751775
    num_agent_steps_sampled: 7836774
    num_agent_steps_trained: 7836774
    num_steps_sampled: 7836774
    num_steps_trained: 7836774
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,844,438389,7836774,5.39535,17.58,-0.47,53.9037




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7846770
  custom_metrics: {}
  date: 2021-11-23_20-44-57
  done: false
  episode_len_mean: 52.26842105263158
  episode_media: {}
  episode_reward_max: 15.530000000000008
  episode_reward_mean: 5.138842105263163
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 190
  episodes_total: 151308
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0878745623621118
          entropy_coeff: 0.01
          kl: 0.015160991704078194
          policy_loss: -0.06349875178755202
          total_loss: 0.086945379453844
          vf_explained_var: 0.9387155771255493
          vf_loss: 0.13678424137889456
    num_agent_steps_sampled: 7846770
    num_agent_steps_trained: 7846770
    num_steps_sampled: 7846770
    num_steps_trained: 7846

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,845,438967,7846770,5.13884,15.53,-0.55,52.2684


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7856766
  custom_metrics: {}
  date: 2021-11-23_20-53-54
  done: false
  episode_len_mean: 53.22222222222222
  episode_media: {}
  episode_reward_max: 17.539999999999978
  episode_reward_mean: 4.947407407407411
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 189
  episodes_total: 151497
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.095429148922962
          entropy_coeff: 0.01
          kl: 0.015198612655670263
          policy_loss: -0.06301753052766688
          total_loss: 0.08743423465241366
          vf_explained_var: 0.9442121982574463
          vf_loss: 0.1367817155204257
    num_agent_steps_sampled: 7856766
    num_agent_steps_trained: 7856766
    num_steps_sampled: 7856766
    num_steps_trained: 7856

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,846,439504,7856766,4.94741,17.54,-0.53,53.2222




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7866762
  custom_metrics: {}
  date: 2021-11-23_21-03-15
  done: false
  episode_len_mean: 52.51052631578948
  episode_media: {}
  episode_reward_max: 13.610000000000005
  episode_reward_mean: 5.114368421052636
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 190
  episodes_total: 151687
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.07541342844446
          entropy_coeff: 0.01
          kl: 0.01495425664962993
          policy_loss: -0.07105323268355365
          total_loss: 0.07717967831945079
          vf_explained_var: 0.9466547966003418
          vf_loss: 0.13491937720018576
    num_agent_steps_sampled: 7866762
    num_agent_steps_trained: 7866762
    num_steps_sampled: 7866762
    num_steps_trained: 78667

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,847,440065,7866762,5.11437,13.61,-0.54,52.5105


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7876758
  custom_metrics: {}
  date: 2021-11-23_21-12-11
  done: false
  episode_len_mean: 52.6931216931217
  episode_media: {}
  episode_reward_max: 15.600000000000007
  episode_reward_mean: 5.123386243386248
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 189
  episodes_total: 151876
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0742627429196157
          entropy_coeff: 0.01
          kl: 0.014132212639985596
          policy_loss: -0.06667989372773925
          total_loss: 0.07668696957705935
          vf_explained_var: 0.9482100009918213
          vf_loss: 0.13191454231853122
    num_agent_steps_sampled: 7876758
    num_agent_steps_trained: 7876758
    num_steps_sampled: 7876758
    num_steps_trained: 787

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,848,440601,7876758,5.12339,15.6,-0.54,52.6931




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7886754
  custom_metrics: {}
  date: 2021-11-23_21-21-19
  done: false
  episode_len_mean: 51.89119170984456
  episode_media: {}
  episode_reward_max: 15.640000000000006
  episode_reward_mean: 5.066683937823838
  episode_reward_min: -0.5800000000000003
  episodes_this_iter: 193
  episodes_total: 152069
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0879877735093895
          entropy_coeff: 0.01
          kl: 0.014457785805179368
          policy_loss: -0.06669777504917494
          total_loss: 0.07921938328339068
          vf_explained_var: 0.9423116445541382
          vf_loss: 0.13386039204622457
    num_agent_steps_sampled: 7886754
    num_agent_steps_trained: 7886754
    num_steps_sampled: 7886754
    num_steps_trained: 78

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,849,441149,7886754,5.06668,15.64,-0.58,51.8912




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7896750
  custom_metrics: {}
  date: 2021-11-23_21-30-29
  done: false
  episode_len_mean: 52.66315789473684
  episode_media: {}
  episode_reward_max: 13.690000000000005
  episode_reward_mean: 5.280052631578951
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 190
  episodes_total: 152259
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.082419834294951
          entropy_coeff: 0.01
          kl: 0.015503735324770599
          policy_loss: -0.07136889397853999
          total_loss: 0.08855109517194305
          vf_explained_var: 0.9452254176139832
          vf_loss: 0.14542473924335048
    num_agent_steps_sampled: 7896750
    num_agent_steps_trained: 7896750
    num_steps_sampled: 7896750
    num_steps_trained: 789

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,850,441699,7896750,5.28005,13.69,-0.52,52.6632




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7906746
  custom_metrics: {}
  date: 2021-11-23_21-39-36
  done: false
  episode_len_mean: 52.067708333333336
  episode_media: {}
  episode_reward_max: 15.600000000000007
  episode_reward_mean: 4.7066666666666706
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 192
  episodes_total: 152451
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0909976287539225
          entropy_coeff: 0.01
          kl: 0.014995778455640207
          policy_loss: -0.06792645984019895
          total_loss: 0.0816994704699232
          vf_explained_var: 0.9531298875808716
          vf_loss: 0.13637364824586753
    num_agent_steps_sampled: 7906746
    num_agent_steps_trained: 7906746
    num_steps_sampled: 7906746
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,851,442246,7906746,4.70667,15.6,-0.52,52.0677


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7916742
  custom_metrics: {}
  date: 2021-11-23_21-48-30
  done: false
  episode_len_mean: 52.47894736842105
  episode_media: {}
  episode_reward_max: 13.580000000000007
  episode_reward_mean: 4.9035263157894775
  episode_reward_min: -0.6600000000000004
  episodes_this_iter: 190
  episodes_total: 152641
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0971300648876943
          entropy_coeff: 0.01
          kl: 0.01479689260042725
          policy_loss: -0.06782238652635002
          total_loss: 0.0746152890755549
          vf_explained_var: 0.9360598921775818
          vf_loss: 0.12969980440411089
    num_agent_steps_sampled: 7916742
    num_agent_steps_trained: 7916742
    num_steps_sampled: 7916742
    num_steps_trained: 791

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,852,442780,7916742,4.90353,13.58,-0.66,52.4789




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7926738
  custom_metrics: {}
  date: 2021-11-23_21-57-38
  done: false
  episode_len_mean: 52.223958333333336
  episode_media: {}
  episode_reward_max: 15.620000000000006
  episode_reward_mean: 4.59182291666667
  episode_reward_min: -0.6300000000000003
  episodes_this_iter: 192
  episodes_total: 152833
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.090734629362941
          entropy_coeff: 0.01
          kl: 0.013988099625896741
          policy_loss: -0.07337858491095592
          total_loss: 0.05777370823045466
          vf_explained_var: 0.9237919449806213
          vf_loss: 0.12019299818071945
    num_agent_steps_sampled: 7926738
    num_agent_steps_trained: 7926738
    num_steps_sampled: 7926738
    num_steps_trained: 792

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,853,443328,7926738,4.59182,15.62,-0.63,52.224




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7936734
  custom_metrics: {}
  date: 2021-11-23_22-06-46
  done: false
  episode_len_mean: 53.27127659574468
  episode_media: {}
  episode_reward_max: 15.43000000000001
  episode_reward_mean: 4.994414893617025
  episode_reward_min: -0.4800000000000002
  episodes_this_iter: 188
  episodes_total: 153021
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1017739452032678
          entropy_coeff: 0.01
          kl: 0.014648987526218906
          policy_loss: -0.06815175480502318
          total_loss: 0.07653080702127106
          vf_explained_var: 0.9480049014091492
          vf_loss: 0.13232807543644884
    num_agent_steps_sampled: 7936734
    num_agent_steps_trained: 7936734
    num_steps_sampled: 7936734
    num_steps_trained: 793

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,854,443876,7936734,4.99441,15.43,-0.48,53.2713




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7946730
  custom_metrics: {}
  date: 2021-11-23_22-15-52
  done: false
  episode_len_mean: 52.47089947089947
  episode_media: {}
  episode_reward_max: 17.609999999999985
  episode_reward_mean: 5.029047619047622
  episode_reward_min: -0.5900000000000003
  episodes_this_iter: 189
  episodes_total: 153210
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.06863162993906
          entropy_coeff: 0.01
          kl: 0.014950066102814285
          policy_loss: -0.06686041937965313
          total_loss: 0.08880576205902839
          vf_explained_var: 0.929641604423523
          vf_loss: 0.14229437635657957
    num_agent_steps_sampled: 7946730
    num_agent_steps_trained: 7946730
    num_steps_sampled: 7946730
    num_steps_trained: 79467

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,855,444422,7946730,5.02905,17.61,-0.59,52.4709




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7956726
  custom_metrics: {}
  date: 2021-11-23_22-25-27
  done: false
  episode_len_mean: 53.38297872340426
  episode_media: {}
  episode_reward_max: 15.48000000000001
  episode_reward_mean: 5.19521276595745
  episode_reward_min: -0.7000000000000004
  episodes_this_iter: 188
  episodes_total: 153398
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.055473898835929
          entropy_coeff: 0.01
          kl: 0.014704543245456234
          policy_loss: -0.06305204214252272
          total_loss: 0.08883677110477795
          vf_explained_var: 0.9384654760360718
          vf_loss: 0.138944763336873
    num_agent_steps_sampled: 7956726
    num_agent_steps_trained: 7956726
    num_steps_sampled: 7956726
    num_steps_trained: 7956726

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,856,444997,7956726,5.19521,15.48,-0.7,53.383




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7966722
  custom_metrics: {}
  date: 2021-11-23_22-34-39
  done: false
  episode_len_mean: 53.657754010695186
  episode_media: {}
  episode_reward_max: 13.670000000000005
  episode_reward_mean: 5.109304812834228
  episode_reward_min: -0.6000000000000003
  episodes_this_iter: 187
  episodes_total: 153585
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0705362376917797
          entropy_coeff: 0.01
          kl: 0.014700008910812546
          policy_loss: -0.0686084292716537
          total_loss: 0.08710941805927057
          vf_explained_var: 0.9396586418151855
          vf_loss: 0.14293474993775093
    num_agent_steps_sampled: 7966722
    num_agent_steps_trained: 7966722
    num_steps_sampled: 7966722
    num_steps_trained: 79

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,857,445549,7966722,5.1093,13.67,-0.6,53.6578




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7976718
  custom_metrics: {}
  date: 2021-11-23_22-43-44
  done: false
  episode_len_mean: 53.32620320855615
  episode_media: {}
  episode_reward_max: 17.630000000000003
  episode_reward_mean: 5.196844919786101
  episode_reward_min: -0.5900000000000003
  episodes_this_iter: 187
  episodes_total: 153772
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0792717780932843
          entropy_coeff: 0.01
          kl: 0.014525046818812342
          policy_loss: -0.06963440734660976
          total_loss: 0.07986044197089187
          vf_explained_var: 0.9324830770492554
          vf_loss: 0.13719769421970493
    num_agent_steps_sampled: 7976718
    num_agent_steps_trained: 7976718
    num_steps_sampled: 7976718
    num_steps_trained: 79

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,858,446094,7976718,5.19684,17.63,-0.59,53.3262


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7986714
  custom_metrics: {}
  date: 2021-11-23_22-52-37
  done: false
  episode_len_mean: 53.212765957446805
  episode_media: {}
  episode_reward_max: 13.630000000000006
  episode_reward_mean: 4.954095744680855
  episode_reward_min: -0.4300000000000002
  episodes_this_iter: 188
  episodes_total: 153960
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0870203044041093
          entropy_coeff: 0.01
          kl: 0.014693406769926472
          policy_loss: -0.06892060584427653
          total_loss: 0.08658396662043981
          vf_explained_var: 0.9436188340187073
          vf_loss: 0.14290135709623092
    num_agent_steps_sampled: 7986714
    num_agent_steps_trained: 7986714
    num_steps_sampled: 7986714
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,859,446626,7986714,4.9541,13.63,-0.43,53.2128




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 7996710
  custom_metrics: {}
  date: 2021-11-23_23-01-43
  done: false
  episode_len_mean: 53.5
  episode_media: {}
  episode_reward_max: 15.630000000000006
  episode_reward_mean: 5.050215053763445
  episode_reward_min: -0.6000000000000003
  episodes_this_iter: 186
  episodes_total: 154146
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.098132309185932
          entropy_coeff: 0.01
          kl: 0.015003939841758424
          policy_loss: -0.06827722027569745
          total_loss: 0.09774586503265757
          vf_explained_var: 0.9393900036811829
          vf_loss: 0.15282355649667945
    num_agent_steps_sampled: 7996710
    num_agent_steps_trained: 7996710
    num_steps_sampled: 7996710
    num_steps_trained: 7996710
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,860,447173,7996710,5.05022,15.63,-0.6,53.5




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8006706
  custom_metrics: {}
  date: 2021-11-23_23-10-55
  done: false
  episode_len_mean: 54.30810810810811
  episode_media: {}
  episode_reward_max: 15.730000000000004
  episode_reward_mean: 5.282270270270275
  episode_reward_min: -0.5700000000000003
  episodes_this_iter: 185
  episodes_total: 154331
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0929117592463053
          entropy_coeff: 0.01
          kl: 0.015321567072562116
          policy_loss: -0.06639975093889934
          total_loss: 0.07862697461953366
          vf_explained_var: 0.9337877631187439
          vf_loss: 0.13105139782302547
    num_agent_steps_sampled: 8006706
    num_agent_steps_trained: 8006706
    num_steps_sampled: 8006706
    num_steps_trained: 80

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,861,447725,8006706,5.28227,15.73,-0.57,54.3081


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8016702
  custom_metrics: {}
  date: 2021-11-23_23-19-46
  done: false
  episode_len_mean: 54.255434782608695
  episode_media: {}
  episode_reward_max: 15.660000000000005
  episode_reward_mean: 5.488206521739135
  episode_reward_min: -0.4000000000000002
  episodes_this_iter: 184
  episodes_total: 154515
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0795000266837307
          entropy_coeff: 0.01
          kl: 0.014686259032998588
          policy_loss: -0.06503006837829088
          total_loss: 0.09787903040861169
          vf_explained_var: 0.9458919763565063
          vf_loss: 0.1502469640000757
    num_agent_steps_sampled: 8016702
    num_agent_steps_trained: 8016702
    num_steps_sampled: 8016702
    num_steps_trained: 80

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,862,448255,8016702,5.48821,15.66,-0.4,54.2554




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8026698
  custom_metrics: {}
  date: 2021-11-23_23-28-53
  done: false
  episode_len_mean: 53.854838709677416
  episode_media: {}
  episode_reward_max: 17.56999999999998
  episode_reward_mean: 5.073870967741939
  episode_reward_min: -0.6300000000000003
  episodes_this_iter: 186
  episodes_total: 154701
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.075980548686292
          entropy_coeff: 0.01
          kl: 0.015128372159660362
          policy_loss: -0.06778976082205629
          total_loss: 0.08804497036295199
          vf_explained_var: 0.9178504943847656
          vf_loss: 0.14213021334838571
    num_agent_steps_sampled: 8026698
    num_agent_steps_trained: 8026698
    num_steps_sampled: 8026698
    num_steps_trained: 802

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,863,448803,8026698,5.07387,17.57,-0.63,53.8548




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8036694
  custom_metrics: {}
  date: 2021-11-23_23-37-59
  done: false
  episode_len_mean: 53.516129032258064
  episode_media: {}
  episode_reward_max: 11.710000000000003
  episode_reward_mean: 4.731774193548391
  episode_reward_min: -0.7300000000000004
  episodes_this_iter: 186
  episodes_total: 154887
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1141982051263373
          entropy_coeff: 0.01
          kl: 0.01431742204531201
          policy_loss: -0.06942349730011758
          total_loss: 0.06427584173827608
          vf_explained_var: 0.9446861743927002
          vf_loss: 0.12222444347314439
    num_agent_steps_sampled: 8036694
    num_agent_steps_trained: 8036694
    num_steps_sampled: 8036694
    num_steps_trained: 80

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,864,449349,8036694,4.73177,11.71,-0.73,53.5161


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8046690
  custom_metrics: {}
  date: 2021-11-23_23-46-55
  done: false
  episode_len_mean: 52.41361256544503
  episode_media: {}
  episode_reward_max: 17.499999999999996
  episode_reward_mean: 5.151308900523564
  episode_reward_min: -0.5900000000000003
  episodes_this_iter: 191
  episodes_total: 155078
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.060156996733692
          entropy_coeff: 0.01
          kl: 0.016109911914882483
          policy_loss: -0.06315236231302221
          total_loss: 0.10730476020953027
          vf_explained_var: 0.9327023029327393
          vf_loss: 0.15435829992802536
    num_agent_steps_sampled: 8046690
    num_agent_steps_trained: 8046690
    num_steps_sampled: 8046690
    num_steps_trained: 804

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,865,449885,8046690,5.15131,17.5,-0.59,52.4136


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8056686
  custom_metrics: {}
  date: 2021-11-23_23-55-50
  done: false
  episode_len_mean: 52.473684210526315
  episode_media: {}
  episode_reward_max: 15.660000000000004
  episode_reward_mean: 4.908578947368425
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 190
  episodes_total: 155268
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0961031412025055
          entropy_coeff: 0.01
          kl: 0.01512154803604509
          policy_loss: -0.06939380799756391
          total_loss: 0.09634447036439368
          vf_explained_var: 0.9413862228393555
          vf_loss: 0.15225053141286304
    num_agent_steps_sampled: 8056686
    num_agent_steps_trained: 8056686
    num_steps_sampled: 8056686
    num_steps_trained: 80

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,866,450419,8056686,4.90858,15.66,-0.54,52.4737




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8066682
  custom_metrics: {}
  date: 2021-11-24_00-05-22
  done: false
  episode_len_mean: 52.03626943005181
  episode_media: {}
  episode_reward_max: 13.580000000000007
  episode_reward_mean: 4.646683937823838
  episode_reward_min: -0.6900000000000004
  episodes_this_iter: 193
  episodes_total: 155461
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.123744154862132
          entropy_coeff: 0.01
          kl: 0.016268257364970118
          policy_loss: -0.06304521766087126
          total_loss: 0.09828080898813107
          vf_explained_var: 0.9178394079208374
          vf_loss: 0.14550234382143656
    num_agent_steps_sampled: 8066682
    num_agent_steps_trained: 8066682
    num_steps_sampled: 8066682
    num_steps_trained: 806

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,867,450991,8066682,4.64668,13.58,-0.69,52.0363


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8076678
  custom_metrics: {}
  date: 2021-11-24_00-14-18
  done: false
  episode_len_mean: 52.44210526315789
  episode_media: {}
  episode_reward_max: 13.620000000000006
  episode_reward_mean: 4.6238421052631615
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 190
  episodes_total: 155651
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.110077194372813
          entropy_coeff: 0.01
          kl: 0.014610789922296275
          policy_loss: -0.0713559794605923
          total_loss: 0.08194238527683768
          vf_explained_var: 0.9306873083114624
          vf_loss: 0.14111392972186718
    num_agent_steps_sampled: 8076678
    num_agent_steps_trained: 8076678
    num_steps_sampled: 8076678
    num_steps_trained: 807

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,868,451528,8076678,4.62384,13.62,-0.54,52.4421


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8086674
  custom_metrics: {}
  date: 2021-11-24_00-23-18
  done: false
  episode_len_mean: 52.536842105263155
  episode_media: {}
  episode_reward_max: 13.659999999999984
  episode_reward_mean: 4.733000000000004
  episode_reward_min: -0.5900000000000003
  episodes_this_iter: 190
  episodes_total: 155841
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.104055559706975
          entropy_coeff: 0.01
          kl: 0.015246669808301617
          policy_loss: -0.07060308644496513
          total_loss: 0.07749440086975456
          vf_explained_var: 0.9249469637870789
          vf_loss: 0.13440422215162362
    num_agent_steps_sampled: 8086674
    num_agent_steps_trained: 8086674
    num_steps_sampled: 8086674
    num_steps_trained: 80

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,869,452067,8086674,4.733,13.66,-0.59,52.5368




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8096670
  custom_metrics: {}
  date: 2021-11-24_00-32-29
  done: false
  episode_len_mean: 51.90155440414508
  episode_media: {}
  episode_reward_max: 15.590000000000005
  episode_reward_mean: 4.7486010362694335
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 193
  episodes_total: 156034
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0999433750847736
          entropy_coeff: 0.01
          kl: 0.014941883229253687
          policy_loss: -0.07026846298765622
          total_loss: 0.0828026222073156
          vf_explained_var: 0.918011486530304
          vf_loss: 0.14003104066605265
    num_agent_steps_sampled: 8096670
    num_agent_steps_trained: 8096670
    num_steps_sampled: 8096670
    num_steps_trained: 80

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,870,452618,8096670,4.7486,15.59,-0.48,51.9016




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8106666
  custom_metrics: {}
  date: 2021-11-24_00-42-01
  done: false
  episode_len_mean: 51.69430051813472
  episode_media: {}
  episode_reward_max: 13.670000000000005
  episode_reward_mean: 5.131761658031092
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 193
  episodes_total: 156227
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.058373952294928
          entropy_coeff: 0.01
          kl: 0.015590056911581234
          policy_loss: -0.07024596091584853
          total_loss: 0.0937953278395122
          vf_explained_var: 0.9373788237571716
          vf_loss: 0.1491089279353843
    num_agent_steps_sampled: 8106666
    num_agent_steps_trained: 8106666
    num_steps_sampled: 8106666
    num_steps_trained: 81066

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,871,453191,8106666,5.13176,13.67,-0.51,51.6943


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8116662
  custom_metrics: {}
  date: 2021-11-24_00-50-57
  done: false
  episode_len_mean: 52.07253886010363
  episode_media: {}
  episode_reward_max: 15.620000000000006
  episode_reward_mean: 5.340414507772024
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 193
  episodes_total: 156420
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.056190295583273
          entropy_coeff: 0.01
          kl: 0.015292105019618696
          policy_loss: -0.06938772154107024
          total_loss: 0.09429819886230557
          vf_explained_var: 0.9252455830574036
          vf_loss: 0.14941049600506284
    num_agent_steps_sampled: 8116662
    num_agent_steps_trained: 8116662
    num_steps_sampled: 8116662
    num_steps_trained: 811

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,872,453727,8116662,5.34041,15.62,-0.52,52.0725


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8126658
  custom_metrics: {}
  date: 2021-11-24_00-59-52
  done: false
  episode_len_mean: 52.95238095238095
  episode_media: {}
  episode_reward_max: 15.530000000000008
  episode_reward_mean: 5.02560846560847
  episode_reward_min: -0.6400000000000003
  episodes_this_iter: 189
  episodes_total: 156609
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.07340078993016
          entropy_coeff: 0.01
          kl: 0.014811360471784307
          policy_loss: -0.0648528658675056
          total_loss: 0.08913839186519804
          vf_explained_var: 0.9456443190574646
          vf_loss: 0.14098313310974647
    num_agent_steps_sampled: 8126658
    num_agent_steps_trained: 8126658
    num_steps_sampled: 8126658
    num_steps_trained: 812665

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,873,454262,8126658,5.02561,15.53,-0.64,52.9524




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8136654
  custom_metrics: {}
  date: 2021-11-24_01-09-14
  done: false
  episode_len_mean: 52.41269841269841
  episode_media: {}
  episode_reward_max: 17.54
  episode_reward_mean: 4.917989417989421
  episode_reward_min: -0.5600000000000003
  episodes_this_iter: 189
  episodes_total: 156798
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.077288466620158
          entropy_coeff: 0.01
          kl: 0.015353053642615619
          policy_loss: -0.06586254674660608
          total_loss: 0.1026497471994205
          vf_explained_var: 0.9392040967941284
          vf_loss: 0.15430900138170725
    num_agent_steps_sampled: 8136654
    num_agent_steps_trained: 8136654
    num_steps_sampled: 8136654
    num_steps_trained: 8136654
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,874,454823,8136654,4.91799,17.54,-0.56,52.4127




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8146650
  custom_metrics: {}
  date: 2021-11-24_01-18-24
  done: false
  episode_len_mean: 52.177083333333336
  episode_media: {}
  episode_reward_max: 17.609999999999985
  episode_reward_mean: 5.158750000000004
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 192
  episodes_total: 156990
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0595716181050343
          entropy_coeff: 0.01
          kl: 0.01488729168437169
          policy_loss: -0.07127973794344669
          total_loss: 0.07312328774337604
          vf_explained_var: 0.9458100199699402
          vf_loss: 0.13108363038173926
    num_agent_steps_sampled: 8146650
    num_agent_steps_trained: 8146650
    num_steps_sampled: 8146650
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,875,455373,8146650,5.15875,17.61,-0.49,52.1771


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8156646
  custom_metrics: {}
  date: 2021-11-24_01-27-21
  done: false
  episode_len_mean: 52.338541666666664
  episode_media: {}
  episode_reward_max: 13.640000000000006
  episode_reward_mean: 5.063281250000004
  episode_reward_min: -0.6100000000000003
  episodes_this_iter: 192
  episodes_total: 157182
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.082980143305767
          entropy_coeff: 0.01
          kl: 0.015299652552852289
          policy_loss: -0.06384311126890227
          total_loss: 0.09509963758904794
          vf_explained_var: 0.94387286901474
          vf_loss: 0.14491802841166865
    num_agent_steps_sampled: 8156646
    num_agent_steps_trained: 8156646
    num_steps_sampled: 8156646
    num_steps_trained: 8156

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,876,455910,8156646,5.06328,13.64,-0.61,52.3385


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8166642
  custom_metrics: {}
  date: 2021-11-24_01-36-17
  done: false
  episode_len_mean: 53.13903743315508
  episode_media: {}
  episode_reward_max: 15.730000000000004
  episode_reward_mean: 5.210000000000004
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 187
  episodes_total: 157369
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.067967958263604
          entropy_coeff: 0.01
          kl: 0.014178569989368433
          policy_loss: -0.0706228646010236
          total_loss: 0.07188304200384368
          vf_explained_var: 0.9493025541305542
          vf_loss: 0.13088502993883872
    num_agent_steps_sampled: 8166642
    num_agent_steps_trained: 8166642
    num_steps_sampled: 8166642
    num_steps_trained: 8166

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,877,456446,8166642,5.21,15.73,-0.52,53.139




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8176638
  custom_metrics: {}
  date: 2021-11-24_01-45-42
  done: false
  episode_len_mean: 51.7319587628866
  episode_media: {}
  episode_reward_max: 13.570000000000007
  episode_reward_mean: 5.537422680412375
  episode_reward_min: -0.47000000000000025
  episodes_this_iter: 194
  episodes_total: 157563
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0548158567832657
          entropy_coeff: 0.01
          kl: 0.014848624659190374
          policy_loss: -0.07090003009362557
          total_loss: 0.08136274166073604
          vf_explained_var: 0.9421766400337219
          vf_loss: 0.13898390522619417
    num_agent_steps_sampled: 8176638
    num_agent_steps_trained: 8176638
    num_steps_sampled: 8176638
    num_steps_trained: 81

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,878,457011,8176638,5.53742,13.57,-0.47,51.732




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8186634
  custom_metrics: {}
  date: 2021-11-24_01-54-50
  done: false
  episode_len_mean: 52.77777777777778
  episode_media: {}
  episode_reward_max: 15.630000000000006
  episode_reward_mean: 4.977830687830692
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 189
  episodes_total: 157752
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0673165104475366
          entropy_coeff: 0.01
          kl: 0.014396326569508754
          policy_loss: -0.06996152503091008
          total_loss: 0.06736844443647026
          vf_explained_var: 0.9580347537994385
          vf_loss: 0.12520650213604875
    num_agent_steps_sampled: 8186634
    num_agent_steps_trained: 8186634
    num_steps_sampled: 8186634
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,879,457559,8186634,4.97783,15.63,-0.48,52.7778


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8196630
  custom_metrics: {}
  date: 2021-11-24_02-03-44
  done: false
  episode_len_mean: 52.73684210526316
  episode_media: {}
  episode_reward_max: 15.610000000000007
  episode_reward_mean: 5.204789473684214
  episode_reward_min: -0.5800000000000003
  episodes_this_iter: 190
  episodes_total: 157942
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0494489452685696
          entropy_coeff: 0.01
          kl: 0.014923578032135926
          policy_loss: -0.06758226086260337
          total_loss: 0.09101866927780701
          vf_explained_var: 0.9330177307128906
          vf_loss: 0.14509764215310506
    num_agent_steps_sampled: 8196630
    num_agent_steps_trained: 8196630
    num_steps_sampled: 8196630
    num_steps_trained: 81

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,880,458093,8196630,5.20479,15.61,-0.58,52.7368




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8206626
  custom_metrics: {}
  date: 2021-11-24_02-12-50
  done: false
  episode_len_mean: 52.114583333333336
  episode_media: {}
  episode_reward_max: 15.670000000000005
  episode_reward_mean: 5.368125000000004
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 192
  episodes_total: 158134
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0594227497836193
          entropy_coeff: 0.01
          kl: 0.01446893482894981
          policy_loss: -0.07386841964860826
          total_loss: 0.08342088800509573
          vf_explained_var: 0.9436693787574768
          vf_loss: 0.14492149220267123
    num_agent_steps_sampled: 8206626
    num_agent_steps_trained: 8206626
    num_steps_sampled: 8206626
    num_steps_trained: 82

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,881,458639,8206626,5.36813,15.67,-0.53,52.1146




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8216622
  custom_metrics: {}
  date: 2021-11-24_02-22-27
  done: false
  episode_len_mean: 51.4559585492228
  episode_media: {}
  episode_reward_max: 17.569999999999993
  episode_reward_mean: 4.897668393782387
  episode_reward_min: -0.6000000000000003
  episodes_this_iter: 193
  episodes_total: 158327
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0636833903301195
          entropy_coeff: 0.01
          kl: 0.014907284897086355
          policy_loss: -0.06802651559972941
          total_loss: 0.09044989003305967
          vf_explained_var: 0.9496654868125916
          vf_loss: 0.14515257959841305
    num_agent_steps_sampled: 8216622
    num_agent_steps_trained: 8216622
    num_steps_sampled: 8216622
    num_steps_trained: 821

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,882,459216,8216622,4.89767,17.57,-0.6,51.456


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8226618
  custom_metrics: {}
  date: 2021-11-24_02-31-23
  done: false
  episode_len_mean: 51.93782383419689
  episode_media: {}
  episode_reward_max: 15.670000000000005
  episode_reward_mean: 5.526943005181351
  episode_reward_min: -0.46000000000000024
  episodes_this_iter: 193
  episodes_total: 158520
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0449714347540615
          entropy_coeff: 0.01
          kl: 0.015179785129510176
          policy_loss: -0.0626048849705545
          total_loss: 0.10902765638344245
          vf_explained_var: 0.9456390142440796
          vf_loss: 0.15750080678344566
    num_agent_steps_sampled: 8226618
    num_agent_steps_trained: 8226618
    num_steps_sampled: 8226618
    num_steps_trained: 82

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,883,459752,8226618,5.52694,15.67,-0.46,51.9378


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8236614
  custom_metrics: {}
  date: 2021-11-24_02-40-17
  done: false
  episode_len_mean: 52.32984293193717
  episode_media: {}
  episode_reward_max: 15.620000000000006
  episode_reward_mean: 4.789005235602098
  episode_reward_min: -0.6900000000000004
  episodes_this_iter: 191
  episodes_total: 158711
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0928245465439486
          entropy_coeff: 0.01
          kl: 0.014472150916716808
          policy_loss: -0.06856849280345441
          total_loss: 0.08978438604235671
          vf_explained_var: 0.9361172318458557
          vf_loss: 0.14631175499121438
    num_agent_steps_sampled: 8236614
    num_agent_steps_trained: 8236614
    num_steps_sampled: 8236614
    num_steps_trained: 82

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,884,460286,8236614,4.78901,15.62,-0.69,52.3298




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8246610
  custom_metrics: {}
  date: 2021-11-24_02-49-28
  done: false
  episode_len_mean: 51.74611398963731
  episode_media: {}
  episode_reward_max: 17.540000000000003
  episode_reward_mean: 5.070207253886015
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 193
  episodes_total: 158904
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0785341149831873
          entropy_coeff: 0.01
          kl: 0.014856525899860279
          policy_loss: -0.06760249169578436
          total_loss: 0.08816621030995134
          vf_explained_var: 0.9438572525978088
          vf_loss: 0.14270901943240913
    num_agent_steps_sampled: 8246610
    num_agent_steps_trained: 8246610
    num_steps_sampled: 8246610
    num_steps_trained: 82

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,885,460837,8246610,5.07021,17.54,-0.51,51.7461




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8256606
  custom_metrics: {}
  date: 2021-11-24_02-58-40
  done: false
  episode_len_mean: 52.02590673575129
  episode_media: {}
  episode_reward_max: 17.739999999999995
  episode_reward_mean: 5.150673575129538
  episode_reward_min: -0.5900000000000003
  episodes_this_iter: 193
  episodes_total: 159097
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0675869919928203
          entropy_coeff: 0.01
          kl: 0.014922285580118487
          policy_loss: -0.06758769234770606
          total_loss: 0.07505572498257351
          vf_explained_var: 0.9333564639091492
          vf_loss: 0.12932445473555684
    num_agent_steps_sampled: 8256606
    num_agent_steps_trained: 8256606
    num_steps_sampled: 8256606
    num_steps_trained: 82

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,886,461389,8256606,5.15067,17.74,-0.59,52.0259




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8266602
  custom_metrics: {}
  date: 2021-11-24_03-07-48
  done: false
  episode_len_mean: 52.3717277486911
  episode_media: {}
  episode_reward_max: 13.700000000000005
  episode_reward_mean: 4.733350785340318
  episode_reward_min: -0.5600000000000003
  episodes_this_iter: 191
  episodes_total: 159288
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0870563813002714
          entropy_coeff: 0.01
          kl: 0.014752296984807471
          policy_loss: -0.06839750735818532
          total_loss: 0.09347088050970298
          vf_explained_var: 0.9312955737113953
          vf_loss: 0.14913137390541967
    num_agent_steps_sampled: 8266602
    num_agent_steps_trained: 8266602
    num_steps_sampled: 8266602
    num_steps_trained: 826

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,887,461937,8266602,4.73335,13.7,-0.56,52.3717




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8276598
  custom_metrics: {}
  date: 2021-11-24_03-17-19
  done: false
  episode_len_mean: 52.2565445026178
  episode_media: {}
  episode_reward_max: 15.550000000000008
  episode_reward_mean: 5.239947643979062
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 191
  episodes_total: 159479
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.065974163482467
          entropy_coeff: 0.01
          kl: 0.01603905904502645
          policy_loss: -0.06266374959468449
          total_loss: 0.11489586545528828
          vf_explained_var: 0.9252812266349792
          vf_loss: 0.16168037381296207
    num_agent_steps_sampled: 8276598
    num_agent_steps_trained: 8276598
    num_steps_sampled: 8276598
    num_steps_trained: 82765

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,888,462508,8276598,5.23995,15.55,-0.54,52.2565




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8286594
  custom_metrics: {}
  date: 2021-11-24_03-26-30
  done: false
  episode_len_mean: 52.82539682539682
  episode_media: {}
  episode_reward_max: 17.589999999999993
  episode_reward_mean: 5.483968253968258
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 189
  episodes_total: 159668
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0628036843964375
          entropy_coeff: 0.01
          kl: 0.01544146833071749
          policy_loss: -0.06761807422112742
          total_loss: 0.10668759297118395
          vf_explained_var: 0.9400898218154907
          vf_loss: 0.15975610781348792
    num_agent_steps_sampled: 8286594
    num_agent_steps_trained: 8286594
    num_steps_sampled: 8286594
    num_steps_trained: 82

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,889,463059,8286594,5.48397,17.59,-0.48,52.8254


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8296590
  custom_metrics: {}
  date: 2021-11-24_03-35-24
  done: false
  episode_len_mean: 52.804232804232804
  episode_media: {}
  episode_reward_max: 13.690000000000003
  episode_reward_mean: 5.346296296296299
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 189
  episodes_total: 159857
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0757375076592686
          entropy_coeff: 0.01
          kl: 0.014586318108677115
          policy_loss: -0.07574592879320342
          total_loss: 0.07377663038614446
          vf_explained_var: 0.9441472887992859
          vf_loss: 0.13705047713284077
    num_agent_steps_sampled: 8296590
    num_agent_steps_trained: 8296590
    num_steps_sampled: 8296590
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,890,463593,8296590,5.3463,13.69,-0.55,52.8042


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8306586
  custom_metrics: {}
  date: 2021-11-24_03-44-21
  done: false
  episode_len_mean: 52.208333333333336
  episode_media: {}
  episode_reward_max: 17.579999999999988
  episode_reward_mean: 5.034479166666671
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 192
  episodes_total: 160049
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.087058253819684
          entropy_coeff: 0.01
          kl: 0.014887528079246669
          policy_loss: -0.06423102736932111
          total_loss: 0.0941953894740102
          vf_explained_var: 0.9376851916313171
          vf_loss: 0.14538134809276726
    num_agent_steps_sampled: 8306586
    num_agent_steps_trained: 8306586
    num_steps_sampled: 8306586
    num_steps_trained: 83

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,891,464129,8306586,5.03448,17.58,-0.49,52.2083




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8316582
  custom_metrics: {}
  date: 2021-11-24_03-53-33
  done: false
  episode_len_mean: 52.9468085106383
  episode_media: {}
  episode_reward_max: 13.600000000000007
  episode_reward_mean: 5.425372340425537
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 188
  episodes_total: 160237
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0294455937113627
          entropy_coeff: 0.01
          kl: 0.014626817926881116
          policy_loss: -0.06294219025972801
          total_loss: 0.09394402076138056
          vf_explained_var: 0.9437656998634338
          vf_loss: 0.1438589462200666
    num_agent_steps_sampled: 8316582
    num_agent_steps_trained: 8316582
    num_steps_sampled: 8316582
    num_steps_trained: 8316

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,892,464682,8316582,5.42537,13.6,-0.51,52.9468




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8326578
  custom_metrics: {}
  date: 2021-11-24_04-02-41
  done: false
  episode_len_mean: 51.44615384615385
  episode_media: {}
  episode_reward_max: 15.690000000000005
  episode_reward_mean: 4.870358974358978
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 195
  episodes_total: 160432
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0460368870970713
          entropy_coeff: 0.01
          kl: 0.014241532336179566
          policy_loss: -0.07202767797158399
          total_loss: 0.07279407526954333
          vf_explained_var: 0.9430627822875977
          vf_loss: 0.13283812953795054
    num_agent_steps_sampled: 8326578
    num_agent_steps_trained: 8326578
    num_steps_sampled: 8326578
    num_steps_trained: 83

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,893,465230,8326578,4.87036,15.69,-0.51,51.4462


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8336574
  custom_metrics: {}
  date: 2021-11-24_04-11-37
  done: false
  episode_len_mean: 52.51578947368421
  episode_media: {}
  episode_reward_max: 15.600000000000007
  episode_reward_mean: 5.24447368421053
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 190
  episodes_total: 160622
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0521427211273147
          entropy_coeff: 0.01
          kl: 0.015139248608880796
          policy_loss: -0.06387589858843502
          total_loss: 0.11588800031484674
          vf_explained_var: 0.9376835227012634
          vf_loss: 0.1657962239089321
    num_agent_steps_sampled: 8336574
    num_agent_steps_trained: 8336574
    num_steps_sampled: 8336574
    num_steps_trained: 8336

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,894,465766,8336574,5.24447,15.6,-0.54,52.5158


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8346570
  custom_metrics: {}
  date: 2021-11-24_04-20-32
  done: false
  episode_len_mean: 52.57068062827225
  episode_media: {}
  episode_reward_max: 15.690000000000005
  episode_reward_mean: 4.86905759162304
  episode_reward_min: -0.6900000000000004
  episodes_this_iter: 191
  episodes_total: 160813
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0611148431119193
          entropy_coeff: 0.01
          kl: 0.014572134560544998
          policy_loss: -0.0647314886362978
          total_loss: 0.08505855094175435
          vf_explained_var: 0.9296025633811951
          vf_loss: 0.1372040439508369
    num_agent_steps_sampled: 8346570
    num_agent_steps_trained: 8346570
    num_steps_sampled: 8346570
    num_steps_trained: 83465

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,895,466301,8346570,4.86906,15.69,-0.69,52.5707




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8356566
  custom_metrics: {}
  date: 2021-11-24_04-29-54
  done: false
  episode_len_mean: 52.02094240837696
  episode_media: {}
  episode_reward_max: 15.630000000000008
  episode_reward_mean: 4.861780104712046
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 191
  episodes_total: 161004
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.034049080940614
          entropy_coeff: 0.01
          kl: 0.014803810431993232
          policy_loss: -0.06981757813894411
          total_loss: 0.07975312560295397
          vf_explained_var: 0.9333649277687073
          vf_loss: 0.1361862612130816
    num_agent_steps_sampled: 8356566
    num_agent_steps_trained: 8356566
    num_steps_sampled: 8356566
    num_steps_trained: 8356

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,896,466863,8356566,4.86178,15.63,-0.53,52.0209


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8366562
  custom_metrics: {}
  date: 2021-11-24_04-38-48
  done: false
  episode_len_mean: 52.182291666666664
  episode_media: {}
  episode_reward_max: 17.69999999999998
  episode_reward_mean: 4.982604166666671
  episode_reward_min: -0.5700000000000003
  episodes_this_iter: 192
  episodes_total: 161196
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.036697377402141
          entropy_coeff: 0.01
          kl: 0.014563574624459789
          policy_loss: -0.06872925473470382
          total_loss: 0.07593319241933902
          vf_explained_var: 0.9432000517845154
          vf_loss: 0.1318517752229255
    num_agent_steps_sampled: 8366562
    num_agent_steps_trained: 8366562
    num_steps_sampled: 8366562
    num_steps_trained: 8366

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,897,467396,8366562,4.9826,17.7,-0.57,52.1823




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8376558
  custom_metrics: {}
  date: 2021-11-24_04-47-55
  done: false
  episode_len_mean: 52.026041666666664
  episode_media: {}
  episode_reward_max: 15.490000000000007
  episode_reward_mean: 5.191875000000004
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 192
  episodes_total: 161388
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0446119396801454
          entropy_coeff: 0.01
          kl: 0.014970932179844135
          policy_loss: -0.06996543319282499
          total_loss: 0.08528095049194265
          vf_explained_var: 0.9281480312347412
          vf_loss: 0.14158684640466404
    num_agent_steps_sampled: 8376558
    num_agent_steps_trained: 8376558
    num_steps_sampled: 8376558
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,898,467944,8376558,5.19188,15.49,-0.52,52.026




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8386554
  custom_metrics: {}
  date: 2021-11-24_04-57-05
  done: false
  episode_len_mean: 51.670103092783506
  episode_media: {}
  episode_reward_max: 15.620000000000006
  episode_reward_mean: 4.817268041237117
  episode_reward_min: -0.6600000000000004
  episodes_this_iter: 194
  episodes_total: 161582
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0553966142326954
          entropy_coeff: 0.01
          kl: 0.014837210475747539
          policy_loss: -0.07100682219791353
          total_loss: 0.06609315144351022
          vf_explained_var: 0.9202849268913269
          vf_loss: 0.12385291799418553
    num_agent_steps_sampled: 8386554
    num_agent_steps_trained: 8386554
    num_steps_sampled: 8386554
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,899,468493,8386554,4.81727,15.62,-0.66,51.6701




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8396550
  custom_metrics: {}
  date: 2021-11-24_05-06-14
  done: false
  episode_len_mean: 52.1151832460733
  episode_media: {}
  episode_reward_max: 15.610000000000007
  episode_reward_mean: 4.958743455497387
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 191
  episodes_total: 161773
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0290179019472205
          entropy_coeff: 0.01
          kl: 0.014963705772463766
          policy_loss: -0.06649725025949463
          total_loss: 0.09454122453461748
          vf_explained_var: 0.938674807548523
          vf_loss: 0.14723945989621331
    num_agent_steps_sampled: 8396550
    num_agent_steps_trained: 8396550
    num_steps_sampled: 8396550
    num_steps_trained: 8396

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,900,469042,8396550,4.95874,15.61,-0.5,52.1152


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8406546
  custom_metrics: {}
  date: 2021-11-24_05-15-11
  done: false
  episode_len_mean: 51.31122448979592
  episode_media: {}
  episode_reward_max: 15.650000000000006
  episode_reward_mean: 5.567653061224496
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 196
  episodes_total: 161969
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0362877221471334
          entropy_coeff: 0.01
          kl: 0.015833057403392675
          policy_loss: -0.0675057446781696
          total_loss: 0.112504333685576
          vf_explained_var: 0.9301347136497498
          vf_loss: 0.1643032707731206
    num_agent_steps_sampled: 8406546
    num_agent_steps_trained: 8406546
    num_steps_sampled: 8406546
    num_steps_trained: 840654

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,901,469579,8406546,5.56765,15.65,-0.52,51.3112




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8416542
  custom_metrics: {}
  date: 2021-11-24_05-24-19
  done: false
  episode_len_mean: 51.597938144329895
  episode_media: {}
  episode_reward_max: 21.659999999999975
  episode_reward_mean: 5.2423711340206225
  episode_reward_min: -0.5700000000000003
  episodes_this_iter: 194
  episodes_total: 162163
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.036122931294652
          entropy_coeff: 0.01
          kl: 0.014592035130507096
          policy_loss: -0.06397340917463144
          total_loss: 0.1026333557089699
          vf_explained_var: 0.9310716390609741
          vf_loss: 0.1537255143675669
    num_agent_steps_sampled: 8416542
    num_agent_steps_trained: 8416542
    num_steps_sampled: 8416542
    num_steps_trained: 841

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,902,470128,8416542,5.24237,21.66,-0.57,51.5979




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8426538
  custom_metrics: {}
  date: 2021-11-24_05-33-46
  done: false
  episode_len_mean: 51.324742268041234
  episode_media: {}
  episode_reward_max: 15.560000000000008
  episode_reward_mean: 5.183608247422685
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 194
  episodes_total: 162357
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0315615175239534
          entropy_coeff: 0.01
          kl: 0.014487503912415541
          policy_loss: -0.06407489420016792
          total_loss: 0.09419209018645829
          vf_explained_var: 0.9440808296203613
          vf_loss: 0.14557825265757096
    num_agent_steps_sampled: 8426538
    num_agent_steps_trained: 8426538
    num_steps_sampled: 8426538
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,903,470694,8426538,5.18361,15.56,-0.51,51.3247




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8436534
  custom_metrics: {}
  date: 2021-11-24_05-42-55
  done: false
  episode_len_mean: 51.15384615384615
  episode_media: {}
  episode_reward_max: 19.599999999999984
  episode_reward_mean: 5.462820512820517
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 195
  episodes_total: 162552
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0253098645602843
          entropy_coeff: 0.01
          kl: 0.015270312656950934
          policy_loss: -0.06591668847980192
          total_loss: 0.09675079332659495
          vf_explained_var: 0.9328515529632568
          vf_loss: 0.14813289817172032
    num_agent_steps_sampled: 8436534
    num_agent_steps_trained: 8436534
    num_steps_sampled: 8436534
    num_steps_trained: 84

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,904,471244,8436534,5.46282,19.6,-0.54,51.1538




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8446530
  custom_metrics: {}
  date: 2021-11-24_05-52-14
  done: false
  episode_len_mean: 51.4
  episode_media: {}
  episode_reward_max: 17.659999999999997
  episode_reward_mean: 4.928307692307696
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 195
  episodes_total: 162747
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0495154293186695
          entropy_coeff: 0.01
          kl: 0.0149445599161773
          policy_loss: -0.06981786593146298
          total_loss: 0.09086635666100708
          vf_explained_var: 0.9352156519889832
          vf_loss: 0.14713380063077935
    num_agent_steps_sampled: 8446530
    num_agent_steps_trained: 8446530
    num_steps_sampled: 8446530
    num_steps_trained: 8446530
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,905,471802,8446530,4.92831,17.66,-0.55,51.4




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8456526
  custom_metrics: {}
  date: 2021-11-24_06-01-25
  done: false
  episode_len_mean: 51.391752577319586
  episode_media: {}
  episode_reward_max: 17.729999999999997
  episode_reward_mean: 5.375670103092787
  episode_reward_min: -0.48000000000000026
  episodes_this_iter: 194
  episodes_total: 162941
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0660909198374156
          entropy_coeff: 0.01
          kl: 0.013878776253028077
          policy_loss: -0.07039673593014256
          total_loss: 0.07819754284583508
          vf_explained_var: 0.9316385388374329
          vf_loss: 0.13763759964403796
    num_agent_steps_sampled: 8456526
    num_agent_steps_trained: 8456526
    num_steps_sampled: 8456526
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,906,472353,8456526,5.37567,17.73,-0.48,51.3918




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8466522
  custom_metrics: {}
  date: 2021-11-24_06-10-34
  done: false
  episode_len_mean: 51.6580310880829
  episode_media: {}
  episode_reward_max: 15.710000000000004
  episode_reward_mean: 5.1221243523316105
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 193
  episodes_total: 163134
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0465996999577825
          entropy_coeff: 0.01
          kl: 0.014574689657775391
          policy_loss: -0.06404805238364433
          total_loss: 0.08876624087289717
          vf_explained_var: 0.9369776248931885
          vf_loss: 0.140077325336291
    num_agent_steps_sampled: 8466522
    num_agent_steps_trained: 8466522
    num_steps_sampled: 8466522
    num_steps_trained: 846

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,907,472902,8466522,5.12212,15.71,-0.49,51.658




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8476518
  custom_metrics: {}
  date: 2021-11-24_06-19-41
  done: false
  episode_len_mean: 51.30102040816327
  episode_media: {}
  episode_reward_max: 15.560000000000008
  episode_reward_mean: 5.324591836734698
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 196
  episodes_total: 163330
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.035855432782307
          entropy_coeff: 0.01
          kl: 0.014797624182200096
          policy_loss: -0.06731525934433467
          total_loss: 0.07765007992876567
          vf_explained_var: 0.9444125890731812
          vf_loss: 0.13161305456626113
    num_agent_steps_sampled: 8476518
    num_agent_steps_trained: 8476518
    num_steps_sampled: 8476518
    num_steps_trained: 847

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,908,473449,8476518,5.32459,15.56,-0.51,51.301




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8486514
  custom_metrics: {}
  date: 2021-11-24_06-29-16
  done: false
  episode_len_mean: 50.568527918781726
  episode_media: {}
  episode_reward_max: 15.590000000000007
  episode_reward_mean: 5.488832487309648
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 197
  episodes_total: 163527
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.029497450805572
          entropy_coeff: 0.01
          kl: 0.015361268071247693
          policy_loss: -0.0623894967481833
          total_loss: 0.1024435690098296
          vf_explained_var: 0.9420347213745117
          vf_loss: 0.15013314962263657
    num_agent_steps_sampled: 8486514
    num_agent_steps_trained: 8486514
    num_steps_sampled: 8486514
    num_steps_trained: 8486

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,909,474024,8486514,5.48883,15.59,-0.54,50.5685


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8496510
  custom_metrics: {}
  date: 2021-11-24_06-38-12
  done: false
  episode_len_mean: 51.83419689119171
  episode_media: {}
  episode_reward_max: 17.66
  episode_reward_mean: 5.1110362694300555
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 193
  episodes_total: 163720
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.045618858801792
          entropy_coeff: 0.01
          kl: 0.014554221306229109
          policy_loss: -0.06700039052104995
          total_loss: 0.09749095122374657
          vf_explained_var: 0.9240342378616333
          vf_loss: 0.15179119318345913
    num_agent_steps_sampled: 8496510
    num_agent_steps_trained: 8496510
    num_steps_sampled: 8496510
    num_steps_trained: 8496510
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,910,474561,8496510,5.11104,17.66,-0.54,51.8342


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8506506
  custom_metrics: {}
  date: 2021-11-24_06-47-09
  done: false
  episode_len_mean: 51.402061855670105
  episode_media: {}
  episode_reward_max: 15.610000000000007
  episode_reward_mean: 5.2989175257732
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 194
  episodes_total: 163914
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.04966036332659
          entropy_coeff: 0.01
          kl: 0.015037211977164629
          policy_loss: -0.06621545661983694
          total_loss: 0.09250403803716434
          vf_explained_var: 0.9334193468093872
          vf_loss: 0.14495944842625125
    num_agent_steps_sampled: 8506506
    num_agent_steps_trained: 8506506
    num_steps_sampled: 8506506
    num_steps_trained: 85065

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,911,475097,8506506,5.29892,15.61,-0.52,51.4021




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8516502
  custom_metrics: {}
  date: 2021-11-24_06-56-33
  done: false
  episode_len_mean: 51.13265306122449
  episode_media: {}
  episode_reward_max: 15.620000000000006
  episode_reward_mean: 4.913928571428576
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 196
  episodes_total: 164110
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0500675396268146
          entropy_coeff: 0.01
          kl: 0.014289810542007524
          policy_loss: -0.06946153703728519
          total_loss: 0.08633458585600275
          vf_explained_var: 0.9350736737251282
          vf_loss: 0.1437428214850024
    num_agent_steps_sampled: 8516502
    num_agent_steps_trained: 8516502
    num_steps_sampled: 8516502
    num_steps_trained: 851

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,912,475661,8516502,4.91393,15.62,-0.5,51.1327




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8526498
  custom_metrics: {}
  date: 2021-11-24_07-05-42
  done: false
  episode_len_mean: 51.20103092783505
  episode_media: {}
  episode_reward_max: 13.630000000000006
  episode_reward_mean: 5.180567010309282
  episode_reward_min: -0.46000000000000024
  episodes_this_iter: 194
  episodes_total: 164304
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0349069148661143
          entropy_coeff: 0.01
          kl: 0.0157953863101394
          policy_loss: -0.0667014258044818
          total_loss: 0.09345419724759048
          vf_explained_var: 0.9463626742362976
          vf_loss: 0.14452082702186872
    num_agent_steps_sampled: 8526498
    num_agent_steps_trained: 8526498
    num_steps_sampled: 8526498
    num_steps_trained: 8526

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,913,476210,8526498,5.18057,13.63,-0.46,51.201




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8536494
  custom_metrics: {}
  date: 2021-11-24_07-15-37
  done: false
  episode_len_mean: 51.829896907216494
  episode_media: {}
  episode_reward_max: 15.520000000000008
  episode_reward_mean: 5.204175257731962
  episode_reward_min: -0.49000000000000016
  episodes_this_iter: 194
  episodes_total: 164498
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.014640753934661
          entropy_coeff: 0.01
          kl: 0.014726687867867535
          policy_loss: -0.06331588469950815
          total_loss: 0.10265424010577265
          vf_explained_var: 0.9485820531845093
          vf_loss: 0.15256729662740026
    num_agent_steps_sampled: 8536494
    num_agent_steps_trained: 8536494
    num_steps_sampled: 8536494
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,914,476805,8536494,5.20418,15.52,-0.49,51.8299




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8546490
  custom_metrics: {}
  date: 2021-11-24_07-24-58
  done: false
  episode_len_mean: 51.015306122448976
  episode_media: {}
  episode_reward_max: 15.630000000000006
  episode_reward_mean: 5.24571428571429
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 196
  episodes_total: 164694
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.038533856255941
          entropy_coeff: 0.01
          kl: 0.014231531003328737
          policy_loss: -0.06300385333177484
          total_loss: 0.09162802505887223
          vf_explained_var: 0.9282523393630981
          vf_loss: 0.14259600912951995
    num_agent_steps_sampled: 8546490
    num_agent_steps_trained: 8546490
    num_steps_sampled: 8546490
    num_steps_trained: 854

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,915,477366,8546490,5.24571,15.63,-0.52,51.0153




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8556486
  custom_metrics: {}
  date: 2021-11-24_07-34-09
  done: false
  episode_len_mean: 50.72222222222222
  episode_media: {}
  episode_reward_max: 13.720000000000004
  episode_reward_mean: 4.768939393939397
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 198
  episodes_total: 164892
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0222872132517726
          entropy_coeff: 0.01
          kl: 0.014664731698652591
          policy_loss: -0.060013748707685326
          total_loss: 0.08446985749264256
          vf_explained_var: 0.9456523656845093
          vf_loss: 0.13129838641628205
    num_agent_steps_sampled: 8556486
    num_agent_steps_trained: 8556486
    num_steps_sampled: 8556486
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,916,477917,8556486,4.76894,13.72,-0.54,50.7222




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8566482
  custom_metrics: {}
  date: 2021-11-24_07-43-27
  done: false
  episode_len_mean: 52.22105263157895
  episode_media: {}
  episode_reward_max: 15.580000000000007
  episode_reward_mean: 5.1187368421052675
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 190
  episodes_total: 165082
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0191791295047743
          entropy_coeff: 0.01
          kl: 0.014473185256148255
          policy_loss: -0.06774908538714665
          total_loss: 0.08974290687682182
          vf_explained_var: 0.9358574748039246
          vf_loss: 0.1447120577420471
    num_agent_steps_sampled: 8566482
    num_agent_steps_trained: 8566482
    num_steps_sampled: 8566482
    num_steps_trained: 85

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,917,478475,8566482,5.11874,15.58,-0.51,52.2211




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8576478
  custom_metrics: {}
  date: 2021-11-24_07-52-39
  done: false
  episode_len_mean: 51.11734693877551
  episode_media: {}
  episode_reward_max: 17.61999999999998
  episode_reward_mean: 4.865816326530616
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 196
  episodes_total: 165278
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.049158635460229
          entropy_coeff: 0.01
          kl: 0.01468464574071826
          policy_loss: -0.06720033253645176
          total_loss: 0.08711462281624913
          vf_explained_var: 0.9331007599830627
          vf_loss: 0.14135308264676166
    num_agent_steps_sampled: 8576478
    num_agent_steps_trained: 8576478
    num_steps_sampled: 8576478
    num_steps_trained: 85764

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,918,479027,8576478,4.86582,17.62,-0.51,51.1173




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8586474
  custom_metrics: {}
  date: 2021-11-24_08-02-06
  done: false
  episode_len_mean: 51.47422680412371
  episode_media: {}
  episode_reward_max: 15.660000000000005
  episode_reward_mean: 5.195979381443303
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 194
  episodes_total: 165472
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0572869626633135
          entropy_coeff: 0.01
          kl: 0.014649333326473998
          policy_loss: -0.06913758458923626
          total_loss: 0.08420600607796294
          vf_explained_var: 0.9434183835983276
          vf_loss: 0.14054344648628095
    num_agent_steps_sampled: 8586474
    num_agent_steps_trained: 8586474
    num_steps_sampled: 8586474
    num_steps_trained: 85

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,919,479594,8586474,5.19598,15.66,-0.54,51.4742




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8596470
  custom_metrics: {}
  date: 2021-11-24_08-11-14
  done: false
  episode_len_mean: 51.292307692307695
  episode_media: {}
  episode_reward_max: 15.510000000000009
  episode_reward_mean: 5.011692307692312
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 195
  episodes_total: 165667
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0465686106777574
          entropy_coeff: 0.01
          kl: 0.013627561198305693
          policy_loss: -0.06444419737361817
          total_loss: 0.0775127198867446
          vf_explained_var: 0.9523164629936218
          vf_loss: 0.13137731542577033
    num_agent_steps_sampled: 8596470
    num_agent_steps_trained: 8596470
    num_steps_sampled: 8596470
    num_steps_trained: 85

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,920,480142,8596470,5.01169,15.51,-0.55,51.2923


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8606466
  custom_metrics: {}
  date: 2021-11-24_08-20-11
  done: false
  episode_len_mean: 51.09230769230769
  episode_media: {}
  episode_reward_max: 13.540000000000008
  episode_reward_mean: 4.843846153846157
  episode_reward_min: -0.5400000000000003
  episodes_this_iter: 195
  episodes_total: 165862
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0703455401232924
          entropy_coeff: 0.01
          kl: 0.013750855244339923
          policy_loss: -0.05788500037301995
          total_loss: 0.08918355917860216
          vf_explained_var: 0.9503427743911743
          vf_loss: 0.13644584652464686
    num_agent_steps_sampled: 8606466
    num_agent_steps_trained: 8606466
    num_steps_sampled: 8606466
    num_steps_trained: 86

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,921,480678,8606466,4.84385,13.54,-0.54,51.0923




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8616462
  custom_metrics: {}
  date: 2021-11-24_08-29-35
  done: false
  episode_len_mean: 51.51794871794872
  episode_media: {}
  episode_reward_max: 13.690000000000005
  episode_reward_mean: 5.238461538461543
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 195
  episodes_total: 166057
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0557109115114174
          entropy_coeff: 0.01
          kl: 0.014549710396351946
          policy_loss: -0.0693264542755571
          total_loss: 0.07791465412053393
          vf_explained_var: 0.9499776363372803
          vf_loss: 0.13465215721603363
    num_agent_steps_sampled: 8616462
    num_agent_steps_trained: 8616462
    num_steps_sampled: 8616462
    num_steps_trained: 861

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,922,481242,8616462,5.23846,13.69,-0.5,51.5179


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8626458
  custom_metrics: {}
  date: 2021-11-24_08-38-30
  done: false
  episode_len_mean: 51.68556701030928
  episode_media: {}
  episode_reward_max: 15.530000000000008
  episode_reward_mean: 5.232680412371138
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 194
  episodes_total: 166251
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0607611806038393
          entropy_coeff: 0.01
          kl: 0.014492798461111008
          policy_loss: -0.06866607500397663
          total_loss: 0.08016958431737885
          vf_explained_var: 0.9260357618331909
          vf_loss: 0.13642686300099374
    num_agent_steps_sampled: 8626458
    num_agent_steps_trained: 8626458
    num_steps_sampled: 8626458
    num_steps_trained: 86

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,923,481777,8626458,5.23268,15.53,-0.5,51.6856




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8636454
  custom_metrics: {}
  date: 2021-11-24_08-47-55
  done: false
  episode_len_mean: 51.66321243523316
  episode_media: {}
  episode_reward_max: 13.680000000000007
  episode_reward_mean: 5.034093264248709
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 193
  episodes_total: 166444
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.036828400954664
          entropy_coeff: 0.01
          kl: 0.014451350171804775
          policy_loss: -0.07054575749245129
          total_loss: 0.06744211913566658
          vf_explained_var: 0.9383122324943542
          vf_loss: 0.1254341776883903
    num_agent_steps_sampled: 8636454
    num_agent_steps_trained: 8636454
    num_steps_sampled: 8636454
    num_steps_trained: 8636

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,924,482343,8636454,5.03409,13.68,-0.5,51.6632




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8646450
  custom_metrics: {}
  date: 2021-11-24_08-57-14
  done: false
  episode_len_mean: 51.90155440414508
  episode_media: {}
  episode_reward_max: 17.549999999999994
  episode_reward_mean: 5.486476683937828
  episode_reward_min: -0.46000000000000024
  episodes_this_iter: 193
  episodes_total: 166637
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.041433014592014
          entropy_coeff: 0.01
          kl: 0.015047141012118367
          policy_loss: -0.0702596405528859
          total_loss: 0.08416641784493754
          vf_explained_var: 0.9544991254806519
          vf_loss: 0.1405611194131604
    num_agent_steps_sampled: 8646450
    num_agent_steps_trained: 8646450
    num_steps_sampled: 8646450
    num_steps_trained: 8646

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,925,482902,8646450,5.48648,17.55,-0.46,51.9016


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8656446
  custom_metrics: {}
  date: 2021-11-24_09-06-14
  done: false
  episode_len_mean: 52.702127659574465
  episode_media: {}
  episode_reward_max: 17.51
  episode_reward_mean: 5.4465425531914935
  episode_reward_min: -0.4300000000000002
  episodes_this_iter: 188
  episodes_total: 166825
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.051307038202822
          entropy_coeff: 0.01
          kl: 0.014884141054086706
          policy_loss: -0.06661567018650878
          total_loss: 0.08675053569253995
          vf_explained_var: 0.9406800866127014
          vf_loss: 0.1399713419963153
    num_agent_steps_sampled: 8656446
    num_agent_steps_trained: 8656446
    num_steps_sampled: 8656446
    num_steps_trained: 8656446
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,926,483442,8656446,5.44654,17.51,-0.43,52.7021




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8666442
  custom_metrics: {}
  date: 2021-11-24_09-15-35
  done: false
  episode_len_mean: 53.12169312169312
  episode_media: {}
  episode_reward_max: 13.610000000000007
  episode_reward_mean: 5.178994708994713
  episode_reward_min: -0.6500000000000004
  episodes_this_iter: 189
  episodes_total: 167014
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0484565265686157
          entropy_coeff: 0.01
          kl: 0.014933308734971113
          policy_loss: -0.07117930504537948
          total_loss: 0.08728353745816814
          vf_explained_var: 0.9397360682487488
          vf_loss: 0.14492746199421344
    num_agent_steps_sampled: 8666442
    num_agent_steps_trained: 8666442
    num_steps_sampled: 8666442
    num_steps_trained: 86

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,927,484003,8666442,5.17899,13.61,-0.65,53.1217




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8676438
  custom_metrics: {}
  date: 2021-11-24_09-24-47
  done: false
  episode_len_mean: 53.58064516129032
  episode_media: {}
  episode_reward_max: 15.520000000000008
  episode_reward_mean: 5.158602150537639
  episode_reward_min: -0.5900000000000003
  episodes_this_iter: 186
  episodes_total: 167200
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0465075590763706
          entropy_coeff: 0.01
          kl: 0.014915408040421442
          policy_loss: -0.06885753989984196
          total_loss: 0.08473659081955337
          vf_explained_var: 0.9231622815132141
          vf_loss: 0.1400800419512231
    num_agent_steps_sampled: 8676438
    num_agent_steps_trained: 8676438
    num_steps_sampled: 8676438
    num_steps_trained: 867

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,928,484555,8676438,5.1586,15.52,-0.59,53.5806




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8686434
  custom_metrics: {}
  date: 2021-11-24_09-33-57
  done: false
  episode_len_mean: 53.66129032258065
  episode_media: {}
  episode_reward_max: 15.570000000000007
  episode_reward_mean: 5.351935483870972
  episode_reward_min: -0.5600000000000003
  episodes_this_iter: 186
  episodes_total: 167386
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0181697778194305
          entropy_coeff: 0.01
          kl: 0.015110607464650821
          policy_loss: -0.06186889727112021
          total_loss: 0.09674146885273699
          vf_explained_var: 0.9499325156211853
          vf_loss: 0.14436821063651317
    num_agent_steps_sampled: 8686434
    num_agent_steps_trained: 8686434
    num_steps_sampled: 8686434
    num_steps_trained: 86

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,929,485104,8686434,5.35194,15.57,-0.56,53.6613


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8696430
  custom_metrics: {}
  date: 2021-11-24_09-42-58
  done: false
  episode_len_mean: 53.80213903743316
  episode_media: {}
  episode_reward_max: 15.570000000000006
  episode_reward_mean: 5.3618716577540155
  episode_reward_min: -0.4600000000000004
  episodes_this_iter: 187
  episodes_total: 167573
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0327986125247066
          entropy_coeff: 0.01
          kl: 0.014851620795241188
          policy_loss: -0.06781251135814517
          total_loss: 0.08042769005815391
          vf_explained_var: 0.9403865337371826
          vf_loss: 0.13473433816579197
    num_agent_steps_sampled: 8696430
    num_agent_steps_trained: 8696430
    num_steps_sampled: 8696430
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,930,485646,8696430,5.36187,15.57,-0.46,53.8021




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8706426
  custom_metrics: {}
  date: 2021-11-24_09-52-12
  done: false
  episode_len_mean: 53.04761904761905
  episode_media: {}
  episode_reward_max: 15.640000000000006
  episode_reward_mean: 4.947671957671962
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 189
  episodes_total: 167762
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.058539793553601
          entropy_coeff: 0.01
          kl: 0.014418267582683043
          policy_loss: -0.06867907171269716
          total_loss: 0.08421755664613487
          vf_explained_var: 0.9401063323020935
          vf_loss: 0.14063541004714747
    num_agent_steps_sampled: 8706426
    num_agent_steps_trained: 8706426
    num_steps_sampled: 8706426
    num_steps_trained: 870

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,931,486199,8706426,4.94767,15.64,-0.5,53.0476




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8716422
  custom_metrics: {}
  date: 2021-11-24_10-01-25
  done: false
  episode_len_mean: 53.75675675675676
  episode_media: {}
  episode_reward_max: 15.480000000000008
  episode_reward_mean: 4.7461621621621655
  episode_reward_min: -0.5900000000000003
  episodes_this_iter: 185
  episodes_total: 167947
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.065685363107896
          entropy_coeff: 0.01
          kl: 0.014200560812869365
          policy_loss: -0.07177725074408851
          total_loss: 0.052053347042610026
          vf_explained_var: 0.9550884366035461
          vf_loss: 0.11213679758085679
    num_agent_steps_sampled: 8716422
    num_agent_steps_trained: 8716422
    num_steps_sampled: 8716422
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,932,486753,8716422,4.74616,15.48,-0.59,53.7568




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8726418
  custom_metrics: {}
  date: 2021-11-24_10-10-37
  done: false
  episode_len_mean: 51.86010362694301
  episode_media: {}
  episode_reward_max: 13.700000000000005
  episode_reward_mean: 5.261658031088087
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 193
  episodes_total: 168140
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.033414740064537
          entropy_coeff: 0.01
          kl: 0.015049322621831254
          policy_loss: -0.06790852050727705
          total_loss: 0.10274876439997675
          vf_explained_var: 0.928893506526947
          vf_loss: 0.1567071941634453
    num_agent_steps_sampled: 8726418
    num_agent_steps_trained: 8726418
    num_steps_sampled: 8726418
    num_steps_trained: 8726

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,933,487304,8726418,5.26166,13.7,-0.49,51.8601


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8736414
  custom_metrics: {}
  date: 2021-11-24_10-19-31
  done: false
  episode_len_mean: 53.22872340425532
  episode_media: {}
  episode_reward_max: 19.639999999999986
  episode_reward_mean: 5.684893617021282
  episode_reward_min: -0.47000000000000025
  episodes_this_iter: 188
  episodes_total: 168328
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.01036161083773
          entropy_coeff: 0.01
          kl: 0.015606693479719594
          policy_loss: -0.06744301604463164
          total_loss: 0.12241865087832692
          vf_explained_var: 0.9403895735740662
          vf_loss: 0.1744112839982153
    num_agent_steps_sampled: 8736414
    num_agent_steps_trained: 8736414
    num_steps_sampled: 8736414
    num_steps_trained: 8736

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,934,487838,8736414,5.68489,19.64,-0.47,53.2287




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8746410
  custom_metrics: {}
  date: 2021-11-24_10-28-38
  done: false
  episode_len_mean: 53.3048128342246
  episode_media: {}
  episode_reward_max: 13.660000000000004
  episode_reward_mean: 5.341336898395727
  episode_reward_min: -0.5800000000000003
  episodes_this_iter: 187
  episodes_total: 168515
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0549057632086267
          entropy_coeff: 0.01
          kl: 0.0151469578454486
          policy_loss: -0.06481853074537172
          total_loss: 0.09585807082571939
          vf_explained_var: 0.9358888864517212
          vf_loss: 0.14671899388005666
    num_agent_steps_sampled: 8746410
    num_agent_steps_trained: 8746410
    num_steps_sampled: 8746410
    num_steps_trained: 87464

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,935,488385,8746410,5.34134,13.66,-0.58,53.3048




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8756406
  custom_metrics: {}
  date: 2021-11-24_10-37-46
  done: false
  episode_len_mean: 53.04255319148936
  episode_media: {}
  episode_reward_max: 13.620000000000006
  episode_reward_mean: 4.950904255319153
  episode_reward_min: -0.5600000000000003
  episodes_this_iter: 188
  episodes_total: 168703
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0295978403235058
          entropy_coeff: 0.01
          kl: 0.014081008270327518
          policy_loss: -0.06722905196268887
          total_loss: 0.088271481577709
          vf_explained_var: 0.9331752061843872
          vf_loss: 0.14371821351843067
    num_agent_steps_sampled: 8756406
    num_agent_steps_trained: 8756406
    num_steps_sampled: 8756406
    num_steps_trained: 8756

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,936,488933,8756406,4.9509,13.62,-0.56,53.0426




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8766402
  custom_metrics: {}
  date: 2021-11-24_10-46-54
  done: false
  episode_len_mean: 52.8
  episode_media: {}
  episode_reward_max: 15.500000000000009
  episode_reward_mean: 5.235421052631583
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 190
  episodes_total: 168893
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0424254986895134
          entropy_coeff: 0.01
          kl: 0.013576294882225561
          policy_loss: -0.07117231621709377
          total_loss: 0.06473129271897773
          vf_explained_var: 0.9461368918418884
          vf_loss: 0.12539936761749557
    num_agent_steps_sampled: 8766402
    num_agent_steps_trained: 8766402
    num_steps_sampled: 8766402
    num_steps_trained: 8766402
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,937,489481,8766402,5.23542,15.5,-0.53,52.8


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8776398
  custom_metrics: {}
  date: 2021-11-24_10-55-46
  done: false
  episode_len_mean: 53.79144385026738
  episode_media: {}
  episode_reward_max: 15.660000000000005
  episode_reward_mean: 5.753636363636367
  episode_reward_min: -0.6100000000000003
  episodes_this_iter: 187
  episodes_total: 169080
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0108223009540374
          entropy_coeff: 0.01
          kl: 0.014741530993259878
          policy_loss: -0.06237090428088463
          total_loss: 0.10063434059550246
          vf_explained_var: 0.9577852487564087
          vf_loss: 0.14953041637361228
    num_agent_steps_sampled: 8776398
    num_agent_steps_trained: 8776398
    num_steps_sampled: 8776398
    num_steps_trained: 87

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,938,490013,8776398,5.75364,15.66,-0.61,53.7914




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8786394
  custom_metrics: {}
  date: 2021-11-24_11-04-52
  done: false
  episode_len_mean: 54.16847826086956
  episode_media: {}
  episode_reward_max: 15.530000000000008
  episode_reward_mean: 5.37239130434783
  episode_reward_min: -0.5800000000000003
  episodes_this_iter: 184
  episodes_total: 169264
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0202962168009884
          entropy_coeff: 0.01
          kl: 0.014469293503120166
          policy_loss: -0.06839719939594782
          total_loss: 0.09367054268477476
          vf_explained_var: 0.9401743412017822
          vf_loss: 0.14930784476689066
    num_agent_steps_sampled: 8786394
    num_agent_steps_trained: 8786394
    num_steps_sampled: 8786394
    num_steps_trained: 878

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,939,490559,8786394,5.37239,15.53,-0.58,54.1685




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8796390
  custom_metrics: {}
  date: 2021-11-24_11-13-57
  done: false
  episode_len_mean: 53.648648648648646
  episode_media: {}
  episode_reward_max: 15.630000000000006
  episode_reward_mean: 5.165837837837842
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 185
  episodes_total: 169449
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.029232506292412
          entropy_coeff: 0.01
          kl: 0.014903139971172291
          policy_loss: -0.06990559217753824
          total_loss: 0.10258941048882242
          vf_explained_var: 0.9347962141036987
          vf_loss: 0.1588361104149895
    num_agent_steps_sampled: 8796390
    num_agent_steps_trained: 8796390
    num_steps_sampled: 8796390
    num_steps_trained: 879

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,940,491104,8796390,5.16584,15.63,-0.51,53.6486


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8806386
  custom_metrics: {}
  date: 2021-11-24_11-22-50
  done: false
  episode_len_mean: 53.83870967741935
  episode_media: {}
  episode_reward_max: 19.53999999999997
  episode_reward_mean: 5.314946236559143
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 186
  episodes_total: 169635
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.040335991200673
          entropy_coeff: 0.01
          kl: 0.014742112410938368
          policy_loss: -0.0622197135543067
          total_loss: 0.11557726676629138
          vf_explained_var: 0.9375296831130981
          vf_loss: 0.16461596428715888
    num_agent_steps_sampled: 8806386
    num_agent_steps_trained: 8806386
    num_steps_sampled: 8806386
    num_steps_trained: 88063

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,941,491637,8806386,5.31495,19.54,-0.5,53.8387




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8816382
  custom_metrics: {}
  date: 2021-11-24_11-31-58
  done: false
  episode_len_mean: 53.5668449197861
  episode_media: {}
  episode_reward_max: 15.530000000000008
  episode_reward_mean: 5.266417112299469
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 187
  episodes_total: 169822
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0320816693296395
          entropy_coeff: 0.01
          kl: 0.015277546177560464
          policy_loss: -0.06451864817368827
          total_loss: 0.09244736248377497
          vf_explained_var: 0.9422403573989868
          vf_loss: 0.14248266633914178
    num_agent_steps_sampled: 8816382
    num_agent_steps_trained: 8816382
    num_steps_sampled: 8816382
    num_steps_trained: 881

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,942,492185,8816382,5.26642,15.53,-0.55,53.5668




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8826378
  custom_metrics: {}
  date: 2021-11-24_11-41-16
  done: false
  episode_len_mean: 54.13586956521739
  episode_media: {}
  episode_reward_max: 17.60000000000001
  episode_reward_mean: 4.9370108695652215
  episode_reward_min: -0.5600000000000003
  episodes_this_iter: 184
  episodes_total: 170006
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.056452109655702
          entropy_coeff: 0.01
          kl: 0.015266843939636602
          policy_loss: -0.06133005406777538
          total_loss: 0.09107489832435073
          vf_explained_var: 0.9520068764686584
          vf_loss: 0.13818969345365437
    num_agent_steps_sampled: 8826378
    num_agent_steps_trained: 8826378
    num_steps_sampled: 8826378
    num_steps_trained: 882

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,943,492743,8826378,4.93701,17.6,-0.56,54.1359




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8836374
  custom_metrics: {}
  date: 2021-11-24_11-50-20
  done: false
  episode_len_mean: 54.09139784946237
  episode_media: {}
  episode_reward_max: 15.680000000000005
  episode_reward_mean: 5.600376344086025
  episode_reward_min: -0.5700000000000003
  episodes_this_iter: 186
  episodes_total: 170192
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0316638193934797
          entropy_coeff: 0.01
          kl: 0.01443255762212209
          policy_loss: -0.06345716875887639
          total_loss: 0.10518507568755217
          vf_explained_var: 0.934119462966919
          vf_loss: 0.15607971147864114
    num_agent_steps_sampled: 8836374
    num_agent_steps_trained: 8836374
    num_steps_sampled: 8836374
    num_steps_trained: 8836

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,944,493287,8836374,5.60038,15.68,-0.57,54.0914


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8846370
  custom_metrics: {}
  date: 2021-11-24_11-59-11
  done: false
  episode_len_mean: 54.53551912568306
  episode_media: {}
  episode_reward_max: 15.590000000000005
  episode_reward_mean: 5.302568306010933
  episode_reward_min: -0.6500000000000004
  episodes_this_iter: 183
  episodes_total: 170375
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0291319528497365
          entropy_coeff: 0.01
          kl: 0.01416140419214594
          policy_loss: -0.06309903728795754
          total_loss: 0.08829672519666118
          vf_explained_var: 0.9324776530265808
          vf_loss: 0.13942563204332273
    num_agent_steps_sampled: 8846370
    num_agent_steps_trained: 8846370
    num_steps_sampled: 8846370
    num_steps_trained: 884

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,945,493818,8846370,5.30257,15.59,-0.65,54.5355




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8856366
  custom_metrics: {}
  date: 2021-11-24_12-08-31
  done: false
  episode_len_mean: 53.03191489361702
  episode_media: {}
  episode_reward_max: 13.490000000000007
  episode_reward_mean: 5.144574468085111
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 188
  episodes_total: 170563
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.067264111238311
          entropy_coeff: 0.01
          kl: 0.014719189716056538
          policy_loss: -0.06774465348127968
          total_loss: 0.09868789287723496
          vf_explained_var: 0.9321010708808899
          vf_loss: 0.15357303215764898
    num_agent_steps_sampled: 8856366
    num_agent_steps_trained: 8856366
    num_steps_sampled: 8856366
    num_steps_trained: 885

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,946,494378,8856366,5.14457,13.49,-0.5,53.0319




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8866362
  custom_metrics: {}
  date: 2021-11-24_12-17-36
  done: false
  episode_len_mean: 53.36363636363637
  episode_media: {}
  episode_reward_max: 13.560000000000006
  episode_reward_mean: 5.448983957219256
  episode_reward_min: -0.5500000000000003
  episodes_this_iter: 187
  episodes_total: 170750
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0389538509539333
          entropy_coeff: 0.01
          kl: 0.014218244363578781
          policy_loss: -0.06895107091125367
          total_loss: 0.0795991273492857
          vf_explained_var: 0.9484196305274963
          vf_loss: 0.13654879769843342
    num_agent_steps_sampled: 8866362
    num_agent_steps_trained: 8866362
    num_steps_sampled: 8866362
    num_steps_trained: 886

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,947,494923,8866362,5.44898,13.56,-0.55,53.3636




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8876358
  custom_metrics: {}
  date: 2021-11-24_12-26-56
  done: false
  episode_len_mean: 52.810526315789474
  episode_media: {}
  episode_reward_max: 15.700000000000003
  episode_reward_mean: 5.171684210526321
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 190
  episodes_total: 170940
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0331363565711134
          entropy_coeff: 0.01
          kl: 0.014191879337859759
          policy_loss: -0.06741329170129988
          total_loss: 0.07788590774151995
          vf_explained_var: 0.9343903064727783
          vf_loss: 0.13329968608356638
    num_agent_steps_sampled: 8876358
    num_agent_steps_trained: 8876358
    num_steps_sampled: 8876358
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,948,495483,8876358,5.17168,15.7,-0.51,52.8105




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8886354
  custom_metrics: {}
  date: 2021-11-24_12-36-05
  done: false
  episode_len_mean: 53.61290322580645
  episode_media: {}
  episode_reward_max: 19.599999999999973
  episode_reward_mean: 5.222311827956994
  episode_reward_min: -0.6900000000000004
  episodes_this_iter: 186
  episodes_total: 171126
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0251500253696517
          entropy_coeff: 0.01
          kl: 0.015102094012411048
          policy_loss: -0.06365987351945583
          total_loss: 0.09845351891355866
          vf_explained_var: 0.9455236792564392
          vf_loss: 0.1479604335364336
    num_agent_steps_sampled: 8886354
    num_agent_steps_trained: 8886354
    num_steps_sampled: 8886354
    num_steps_trained: 888

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,949,496032,8886354,5.22231,19.6,-0.69,53.6129


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8896350
  custom_metrics: {}
  date: 2021-11-24_12-44-59
  done: false
  episode_len_mean: 53.57219251336898
  episode_media: {}
  episode_reward_max: 15.580000000000007
  episode_reward_mean: 5.2352406417112345
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 187
  episodes_total: 171313
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.017543608788027
          entropy_coeff: 0.01
          kl: 0.014251580308066461
          policy_loss: -0.06749016875344817
          total_loss: 0.08314770061098029
          vf_explained_var: 0.948288083076477
          vf_loss: 0.13834642191692423
    num_agent_steps_sampled: 8896350
    num_agent_steps_trained: 8896350
    num_steps_sampled: 8896350
    num_steps_trained: 889

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,950,496566,8896350,5.23524,15.58,-0.51,53.5722




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8906346
  custom_metrics: {}
  date: 2021-11-24_12-54-07
  done: false
  episode_len_mean: 54.18478260869565
  episode_media: {}
  episode_reward_max: 13.530000000000008
  episode_reward_mean: 5.134347826086961
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 184
  episodes_total: 171497
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.058548657147281
          entropy_coeff: 0.01
          kl: 0.014362773993806324
          policy_loss: -0.06539698780619524
          total_loss: 0.0812699946819057
          vf_explained_var: 0.9348485469818115
          vf_loss: 0.13453227343770813
    num_agent_steps_sampled: 8906346
    num_agent_steps_trained: 8906346
    num_steps_sampled: 8906346
    num_steps_trained: 890

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,951,497113,8906346,5.13435,13.53,-0.49,54.1848




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8916342
  custom_metrics: {}
  date: 2021-11-24_13-03-16
  done: false
  episode_len_mean: 52.94179894179894
  episode_media: {}
  episode_reward_max: 15.670000000000007
  episode_reward_mean: 5.063121693121697
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 189
  episodes_total: 171686
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0368468397592445
          entropy_coeff: 0.01
          kl: 0.013692797902045322
          policy_loss: -0.06776632004120947
          total_loss: 0.06799779617311535
          vf_explained_var: 0.943159818649292
          vf_loss: 0.12493867767866267
    num_agent_steps_sampled: 8916342
    num_agent_steps_trained: 8916342
    num_steps_sampled: 8916342
    num_steps_trained: 891

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,952,497662,8916342,5.06312,15.67,-0.53,52.9418




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8926338
  custom_metrics: {}
  date: 2021-11-24_13-12-25
  done: false
  episode_len_mean: 52.957671957671955
  episode_media: {}
  episode_reward_max: 19.519999999999975
  episode_reward_mean: 5.61507936507937
  episode_reward_min: -0.6000000000000003
  episodes_this_iter: 189
  episodes_total: 171875
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.038629170617904
          entropy_coeff: 0.01
          kl: 0.016223078726621428
          policy_loss: -0.06562047803332977
          total_loss: 0.11066393745728405
          vf_explained_var: 0.9226809740066528
          vf_loss: 0.15971250420896313
    num_agent_steps_sampled: 8926338
    num_agent_steps_trained: 8926338
    num_steps_sampled: 8926338
    num_steps_trained: 892

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,953,498212,8926338,5.61508,19.52,-0.6,52.9577




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8936334
  custom_metrics: {}
  date: 2021-11-24_13-21-37
  done: false
  episode_len_mean: 53.44919786096256
  episode_media: {}
  episode_reward_max: 13.630000000000006
  episode_reward_mean: 5.300374331550806
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 187
  episodes_total: 172062
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.015439967098964
          entropy_coeff: 0.01
          kl: 0.01473071074508811
          policy_loss: -0.06168114215267476
          total_loss: 0.0979028529558089
          vf_explained_var: 0.9477227330207825
          vf_loss: 0.14617999341230406
    num_agent_steps_sampled: 8936334
    num_agent_steps_trained: 8936334
    num_steps_sampled: 8936334
    num_steps_trained: 89363

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,954,498764,8936334,5.30037,13.63,-0.53,53.4492


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8946330
  custom_metrics: {}
  date: 2021-11-24_13-30-31
  done: false
  episode_len_mean: 53.91935483870968
  episode_media: {}
  episode_reward_max: 19.629999999999995
  episode_reward_mean: 5.551666666666671
  episode_reward_min: -0.5000000000000002
  episodes_this_iter: 186
  episodes_total: 172248
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.023955290360623
          entropy_coeff: 0.01
          kl: 0.014900132115834826
          policy_loss: -0.06566527621181308
          total_loss: 0.09299833659849052
          vf_explained_var: 0.9424542784690857
          vf_loss: 0.14495880086167554
    num_agent_steps_sampled: 8946330
    num_agent_steps_trained: 8946330
    num_steps_sampled: 8946330
    num_steps_trained: 894

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,955,499298,8946330,5.55167,19.63,-0.5,53.9194


Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8956326
  custom_metrics: {}
  date: 2021-11-24_13-39-26
  done: false
  episode_len_mean: 53.543010752688176
  episode_media: {}
  episode_reward_max: 19.629999999999985
  episode_reward_mean: 5.417419354838714
  episode_reward_min: -0.5100000000000002
  episodes_this_iter: 186
  episodes_total: 172434
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.034759930554164
          entropy_coeff: 0.01
          kl: 0.014535413531985394
          policy_loss: -0.0649596277851472
          total_loss: 0.09961479039993427
          vf_explained_var: 0.9369463920593262
          vf_loss: 0.1518085277617823
    num_agent_steps_sampled: 8956326
    num_agent_steps_trained: 8956326
    num_steps_sampled: 8956326
    num_steps_trained: 8956

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,956,499833,8956326,5.41742,19.63,-0.51,53.543




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8966322
  custom_metrics: {}
  date: 2021-11-24_13-48-49
  done: false
  episode_len_mean: 52.42631578947368
  episode_media: {}
  episode_reward_max: 11.670000000000005
  episode_reward_mean: 5.15789473684211
  episode_reward_min: -0.49000000000000027
  episodes_this_iter: 190
  episodes_total: 172624
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0455632435748856
          entropy_coeff: 0.01
          kl: 0.014036878846392513
          policy_loss: -0.06693663153207262
          total_loss: 0.07707465990660053
          vf_explained_var: 0.9358654618263245
          vf_loss: 0.13248915745536366
    num_agent_steps_sampled: 8966322
    num_agent_steps_trained: 8966322
    num_steps_sampled: 8966322
    num_steps_trained: 89

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,957,500395,8966322,5.15789,11.67,-0.49,52.4263




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8976318
  custom_metrics: {}
  date: 2021-11-24_13-57-55
  done: false
  episode_len_mean: 52.82010582010582
  episode_media: {}
  episode_reward_max: 11.650000000000004
  episode_reward_mean: 5.110158730158734
  episode_reward_min: -0.5200000000000002
  episodes_this_iter: 189
  episodes_total: 172813
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.020756504095223
          entropy_coeff: 0.01
          kl: 0.014549847516608195
          policy_loss: -0.07067969550834931
          total_loss: 0.08266575221908619
          vf_explained_var: 0.9270918369293213
          vf_loss: 0.14040663986014254
    num_agent_steps_sampled: 8976318
    num_agent_steps_trained: 8976318
    num_steps_sampled: 8976318
    num_steps_trained: 897

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,958,500941,8976318,5.11016,11.65,-0.52,52.8201




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8986314
  custom_metrics: {}
  date: 2021-11-24_14-07-01
  done: false
  episode_len_mean: 52.705263157894734
  episode_media: {}
  episode_reward_max: 17.650000000000002
  episode_reward_mean: 5.366789473684215
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 190
  episodes_total: 173003
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0127820288081724
          entropy_coeff: 0.01
          kl: 0.01478681427860808
          policy_loss: -0.06469329855484844
          total_loss: 0.09053466777482995
          vf_explained_var: 0.9542861580848694
          vf_loss: 0.1416695748955146
    num_agent_steps_sampled: 8986314
    num_agent_steps_trained: 8986314
    num_steps_sampled: 8986314
    num_steps_trained: 898

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,959,501487,8986314,5.36679,17.65,-0.53,52.7053




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 8996310
  custom_metrics: {}
  date: 2021-11-24_14-16-33
  done: false
  episode_len_mean: 51.80927835051546
  episode_media: {}
  episode_reward_max: 17.539999999999996
  episode_reward_mean: 5.4713402061855705
  episode_reward_min: -0.6100000000000003
  episodes_this_iter: 194
  episodes_total: 173197
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0271983699386857
          entropy_coeff: 0.01
          kl: 0.01528316438796681
          policy_loss: -0.061604799133232864
          total_loss: 0.12709781099606132
          vf_explained_var: 0.9412603378295898
          vf_loss: 0.17415763409471655
    num_agent_steps_sampled: 8996310
    num_agent_steps_trained: 8996310
    num_steps_sampled: 8996310
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,960,502059,8996310,5.47134,17.54,-0.61,51.8093




Result for PPO_my_env_9f6d0_00000:
  agent_timesteps_total: 9006306
  custom_metrics: {}
  date: 2021-11-24_14-25-44
  done: false
  episode_len_mean: 52.192708333333336
  episode_media: {}
  episode_reward_max: 15.620000000000006
  episode_reward_mean: 5.4918229166666706
  episode_reward_min: -0.5300000000000002
  episodes_this_iter: 192
  episodes_total: 173389
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.0236422120567306
          entropy_coeff: 0.01
          kl: 0.014942644872522011
          policy_loss: -0.06531815659060763
          total_loss: 0.0981180763773822
          vf_explained_var: 0.9484711289405823
          vf_loss: 0.14963144206191342
    num_agent_steps_sampled: 9006306
    num_agent_steps_trained: 9006306
    num_steps_sampled: 9006306
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_9f6d0_00000,RUNNING,192.168.3.5:133291,961,502611,9006306,5.49182,15.62,-0.53,52.1927


In [None]:
!l