In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=2, stride=2, padding=0),  
            nn.ELU(),
            nn.Conv2d(32, 32, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(32, 64, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(64, 128, kernel_size=2, stride=2, padding=0),
            nn.ELU(), 
            nn.Conv2d(128, 256, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Conv2d(256, 512, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
class PositionalEncoder(nn.Module):
    def __init__(self, d_model=6):
        super().__init__()
        self.d_model= d_model
        if self.d_model % 6 != 0:
            raise ValueError("d_models must be divedable on 6!")

        pe = np.zeros((9, 11, 11, d_model))

        for pos_x in range(9):
            pe[pos_x,:,:,0:d_model//3:2] = np.sin(0.33 * pos_x / 10_000 ** (6*np.arange(d_model//6)/d_model))
            pe[pos_x,:,:,1:d_model//3:2] = np.cos(0.33 * pos_x / 10_000 ** (6*np.arange(d_model//6)/d_model))

        for pos_y in range(11):
            pe[:,pos_y,:,d_model//3:2*d_model//3:2] = np.sin(0.33 * pos_y / 10_000 ** (6*np.arange(d_model//6)/d_model))
            pe[:,pos_y,:,1+d_model//3:2*d_model//3:2] = np.cos(0.33 * pos_y / 10_000 ** (6*np.arange(d_model//6)/d_model))

        for pos_z in range(11):
            pe[:,:,pos_z,2*d_model//3::2] = np.sin(0.33 * pos_z / 10_000 ** (6*np.arange(d_model//6)/d_model))
            pe[:,:,pos_z,1+2*d_model//3::2] = np.cos(0.33 * pos_z / 10_000 ** (6*np.arange(d_model//6)/d_model))
            
        pe = pe.reshape(9 * 11 * 11, d_model)
        self.pe = torch.tensor(pe).float()
        
    def forward(self):
        #x = x * math.sqrt(d_model // 3) # is it needed?
        #x = x + self.pe
        return self.pe

In [4]:
class FusionNet(nn.Module):
    def __init__(self, d_model=6, num_heads=1):
        super().__init__()
        self.d_model = d_model
        self.pe = nn.Parameter(PositionalEncoder(d_model)())
        
        self.img_preproc = nn.Sequential(
            nn.Linear(512, 60),
            nn.ELU(),
        )
        
        self.cross_attn = nn.MultiheadAttention(d_model, num_heads, batch_first=True)
        self.self_attn = nn.MultiheadAttention(d_model, num_heads, batch_first=True)
        
        self.conv_net = nn.Sequential(
            nn.Conv3d(6, 8, kernel_size=3, padding=1),            # perceptive field = 3
            nn.ELU(),
            nn.Conv3d(8, 16, kernel_size=3, padding=1),           # perceptive field = 5
            nn.ELU(),
            nn.Conv3d(16, 32, kernel_size=3, padding=1),          # perceptive field = 7
            nn.ELU(),
            nn.Conv3d(32, 64, kernel_size=3, padding=1),          # perceptive field = 9
            nn.ELU(),
            nn.Conv3d(64, 128, kernel_size=3, padding=1),         # perceptive field = 11
            nn.ELU(),
            nn.MaxPool3d(kernel_size=(9, 11, 11))
        )
        
        self.img_mlp = nn.Sequential(
            nn.Linear(512, 256),
            nn.ELU(),
            nn.Linear(256, 128),
            nn.ELU(),
        )
        
        self.mlp = nn.Sequential(
            nn.Linear(128 + 128, 256),
            nn.ELU(),
            nn.Linear(256, 256),
            nn.ELU(),
        )
        
    def forward(self, target_features, img_features):
        batch_size = target_features.shape[0]
        
        img_features2 = self.img_preproc(img_features)
        target_features = target_features.permute(0, 2, 3, 4, 1).reshape(batch_size, 9 * 11 * 11, self.d_model)
        img_features2 = img_features2.reshape(batch_size, -1, self.d_model)
        target_features += self.cross_attn(key=img_features2, value=img_features2, query=target_features)[0]
        k = q = target_features + self.pe
        target_features += self.self_attn(key=k, value=target_features, query=q)[0]
        
        target_features = target_features.reshape(batch_size, 9, 11, 11, self.d_model).permute(0, 4, 1, 2, 3)
        target_features = self.conv_net(target_features).reshape(batch_size, -1)
        
        img_features = self.img_mlp(img_features)
        
        features = torch.cat([target_features, img_features], dim=1)
        features = self.mlp(features)
        
        return features

In [5]:
net = FusionNet()
sum(p.numel() for p in net.parameters())

628762

In [6]:
from torch.nn.functional import one_hot

class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        visual_features_dim = 512
        target_features_dim = 9 * 11 * 11 
        self.visual_encoder = VisualEncoder()
        self.visual_encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AngelaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.target_encoder = nn.Sequential(
            nn.Conv3d(7, 6, kernel_size=1, stride=1, padding=0),
            nn.ELU(),
        )
        policy_hidden_dim = 256 
        self.policy_network = FusionNet()
        
        self.action_head = nn.Linear(policy_hidden_dim, action_space.n)
        self.value_head = nn.Linear(policy_hidden_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.visual_encoder.cuda()
            self.target_encoder.cuda()
            self.policy_network.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs']
        pov = obs['pov'].permute(0, 3, 1, 2).float() / 255.0
        target = one_hot(obs['target_grid'].long(), num_classes=7).permute(0, 4, 1, 2, 3).float()
        if self.use_cuda:
            pov.cuda()
            target.cuda()
            
        with torch.no_grad():
            visual_features = self.visual_encoder(pov)
            
        target_features = self.target_encoder(target)
        
        features = self.policy_network(target_features, visual_features)
        
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [7]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [8]:
class VisualObservationWrapper(ObsWrapper):
    def __init__(self, env, include_target=False):
        super().__init__(env)
        self.observation_space = {   
            'pov': gym.spaces.Box(low=0, high=255, shape=(64, 64, 3)),
            'inventory': gym.spaces.Box(low=0.0, high=20.0, shape=(6,)),
            'compass': gym.spaces.Box(low=-180.0, high=180.0, shape=(1,))
        }
        if include_target:
            self.observation_space['target_grid'] = \
                gym.spaces.Box(low=0, high=6, shape=(9, 11, 11))
        self.observation_space = gym.spaces.Dict(self.observation_space)

    def observation(self, obs, reward=None, done=None, info=None):
        if info is not None:
            if 'target_grid' in info:
                target_grid = info['target_grid']
                del info['target_grid']
            else:
                logger.error(f'info: {info}')
                if hasattr(self.unwrapped, 'should_reset'):
                    self.unwrapped.should_reset(True)
                target_grid = self.env.unwrapped.tasks.current.target_grid
        else:
            target_grid = self.env.unwrapped.tasks.current.target_grid
        return {
            'pov': obs['pov'].astype(np.float32),
            'inventory': obs['inventory'],
            'compass': np.array([obs['compass']['angle'].item()]),
            'target_grid': target_grid
        }

In [9]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

from iglu.tasks import CustomTasks
task_names = ['C3', 'C17', 'C32']
tasks = []
augmented_chats = np.load("data/augmented_chats.npy")
augmented_tasks = np.load("data/augmented_targets.npy")
augmented_target_names = np.load("data/augmented_target_name.npy")

for i in range(augmented_chats.shape[0]):
    if augmented_target_names[i] in task_names or True:
        task = (augmented_chats[i], augmented_tasks[i])
        tasks.append(task)
print("{} tasks in total.".format(len(tasks)))
    
class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, rew):
        if rew == 0:
            rew = -0.01
        if abs(rew) == 1:
            rew /= 10
            
        return rew
    
def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=125)
    env.update_taskset(CustomTasks(tasks))
    #env.update_taskset(TaskSet(preset=['C3', 'C17', 'C32']))
    #env = PovOnlyWrapper(env)
    env = VisualObservationWrapper(env, include_target=True)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    env = RewardWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

2850 tasks in total.


In [None]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 3,
             "sgd_minibatch_size": 60,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 5_000,
             #"lr": 1e-4,
             #"gamma": 0.99,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO (AUG ALL) pretrained (visual pretrained AngelaCNN + CrossAttn 3)"
                  }
              }

        },
        loggers=[WandbLogger],
        local_dir="/IGLU-Minecraft/checkpoints/all_tasks_aug_cross_attn3",
        keep_checkpoints_num=100,
        checkpoint_freq=5,
        checkpoint_at_end=True,
        restore="/IGLU-Minecraft/checkpoints/3_tasks_aug_cross_attn3/PPO_2021-11-18_17-44-45/PPO_my_env_37c4c_00000_0_2021-11-18_17-44-45/checkpoint_000075/checkpoint-75",
        )

Trial name,status,loc
PPO_my_env_ab24a_00000,PENDING,


2021-11-18 22:20:00,059	INFO trainable.py:76 -- Checkpoint size is 10703065 bytes
2021-11-18 22:20:00,069	INFO wandb.py:170 -- Already logged into W&B.
[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.6 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=101983)[0m 2021-11-18 22:20:03,556	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=101983)[0m 2021-11-18 22:20:03,556	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=101983)[0m 2021-11-18 22:20:44,891	INFO trainable.py:109 -- Trainable.setup took 43.845 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Trial name,status,loc
PPO_my_env_ab24a_00000,RUNNING,


[2m[36m(pid=101983)[0m 2021-11-18 22:20:44,929	INFO trainable.py:383 -- Restored on 192.168.3.5 from checkpoint: /IGLU-Minecraft/checkpoints/all_tasks_aug_cross_attn3/PPO_2021-11-18_22-19-59/PPO_my_env_ab24a_00000_0_2021-11-18_22-20-00/tmpo2ph_14wrestore_from_object/checkpoint-75
[2m[36m(pid=101983)[0m 2021-11-18 22:20:44,929	INFO trainable.py:390 -- Current state after restoring: {'_iteration': 75, '_timesteps_total': None, '_time_total': 13971.266767501831, '_episodes_total': 1884}


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 159846
  custom_metrics: {}
  date: 2021-11-18_22-30-34
  done: false
  episode_len_mean: 53.38709677419355
  episode_media: {}
  episode_reward_max: 11.730000000000002
  episode_reward_mean: 1.6088709677419366
  episode_reward_min: -1.4100000000000004
  episodes_this_iter: 186
  episodes_total: 2070
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9532022974577294
          entropy_coeff: 0.01
          kl: 0.038833283949948574
          policy_loss: -0.012554831940756541
          total_loss: 0.586841287453373
          vf_explained_var: 0.6240488290786743
          vf_loss: 0.6111614864686108
    num_agent_steps_sampled: 159846
    num_agent_steps_trained: 159846
    num_steps_sampled: 159846
    num_steps_trained: 159846
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,76,14561.1,159846,1.60887,11.73,-1.41,53.3871




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 169842
  custom_metrics: {}
  date: 2021-11-18_22-39-32
  done: false
  episode_len_mean: 55.849162011173185
  episode_media: {}
  episode_reward_max: 9.680000000000003
  episode_reward_mean: 1.2478212290502801
  episode_reward_min: -1.4200000000000004
  episodes_this_iter: 179
  episodes_total: 2249
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.036359779087894
          entropy_coeff: 0.01
          kl: 0.028732883416054317
          policy_loss: -0.025777513589409533
          total_loss: 0.3970910580978851
          vf_explained_var: 0.5564389824867249
          vf_loss: 0.43461230346581425
    num_agent_steps_sampled: 169842
    num_agent_steps_trained: 169842
    num_steps_sampled: 169842
    num_steps_trained: 169842
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,77,15098.4,169842,1.24782,9.68,-1.42,55.8492


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 179838
  custom_metrics: {}
  date: 2021-11-18_22-48-14
  done: false
  episode_len_mean: 55.35911602209945
  episode_media: {}
  episode_reward_max: 9.720000000000004
  episode_reward_mean: 1.9670165745856372
  episode_reward_min: -1.1600000000000006
  episodes_this_iter: 181
  episodes_total: 2430
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.45000000000000007
          cur_lr: 5.000000000000001e-05
          entropy: 2.006329729710238
          entropy_coeff: 0.01
          kl: 0.02545284179714309
          policy_loss: -0.029569393495647633
          total_loss: 0.40919866186245984
          vf_explained_var: 0.5892396569252014
          vf_loss: 0.4473775730890981
    num_agent_steps_sampled: 179838
    num_agent_steps_trained: 179838
    num_steps_sampled: 179838
    num_steps_trained: 179838
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,78,15620.9,179838,1.96702,9.72,-1.16,55.3591




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 189834
  custom_metrics: {}
  date: 2021-11-18_22-57-19
  done: false
  episode_len_mean: 54.333333333333336
  episode_media: {}
  episode_reward_max: 11.840000000000002
  episode_reward_mean: 1.8154098360655753
  episode_reward_min: -1.1900000000000002
  episodes_this_iter: 183
  episodes_total: 2613
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 2.007105367537962
          entropy_coeff: 0.01
          kl: 0.022433403438128614
          policy_loss: -0.03074373633509135
          total_loss: 0.3270955482694204
          vf_explained_var: 0.5780500173568726
          vf_loss: 0.36276779036920326
    num_agent_steps_sampled: 189834
    num_agent_steps_trained: 189834
    num_steps_sampled: 189834
    num_steps_trained: 189834
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,79,16166,189834,1.81541,11.84,-1.19,54.3333


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 199830
  custom_metrics: {}
  date: 2021-11-18_23-06-03
  done: false
  episode_len_mean: 55.353591160220994
  episode_media: {}
  episode_reward_max: 13.630000000000004
  episode_reward_mean: 2.050939226519339
  episode_reward_min: -1.5400000000000005
  episodes_this_iter: 181
  episodes_total: 2794
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 2.056528451978921
          entropy_coeff: 0.01
          kl: 0.01937459087232402
          policy_loss: -0.02758696748061343
          total_loss: 0.3250884754011507
          vf_explained_var: 0.49230796098709106
          vf_loss: 0.35362395232966
    num_agent_steps_sampled: 199830
    num_agent_steps_trained: 199830
    num_steps_sampled: 199830
    num_steps_trained: 199830
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,80,16690.2,199830,2.05094,13.63,-1.54,55.3536




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 209826
  custom_metrics: {}
  date: 2021-11-18_23-15-06
  done: false
  episode_len_mean: 53.53763440860215
  episode_media: {}
  episode_reward_max: 11.290000000000004
  episode_reward_mean: 1.8777419354838725
  episode_reward_min: -1.3900000000000003
  episodes_this_iter: 186
  episodes_total: 2980
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 2.047312106928193
          entropy_coeff: 0.01
          kl: 0.018749353801040442
          policy_loss: -0.02946826264016862
          total_loss: 0.3232933225230212
          vf_explained_var: 0.5283754467964172
          vf_loss: 0.35425098457852433
    num_agent_steps_sampled: 209826
    num_agent_steps_trained: 209826
    num_steps_sampled: 209826
    num_steps_trained: 209826
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,81,17232.3,209826,1.87774,11.29,-1.39,53.5376


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 219822
  custom_metrics: {}
  date: 2021-11-18_23-23-54
  done: false
  episode_len_mean: 53.05820105820106
  episode_media: {}
  episode_reward_max: 9.710000000000003
  episode_reward_mean: 1.5287830687830704
  episode_reward_min: -1.2400000000000002
  episodes_this_iter: 189
  episodes_total: 3169
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 2.064307653066145
          entropy_coeff: 0.01
          kl: 0.018043557160259655
          policy_loss: -0.033351991099062804
          total_loss: 0.31701066776981
          vf_explained_var: 0.5444420576095581
          vf_loss: 0.35273663205418165
    num_agent_steps_sampled: 219822
    num_agent_steps_trained: 219822
    num_steps_sampled: 219822
    num_steps_trained: 219822
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,82,17760.8,219822,1.52878,9.71,-1.24,53.0582




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 229818
  custom_metrics: {}
  date: 2021-11-18_23-32-58
  done: false
  episode_len_mean: 52.204188481675395
  episode_media: {}
  episode_reward_max: 9.130000000000013
  episode_reward_mean: 1.9652356020942428
  episode_reward_min: -1.3800000000000006
  episodes_this_iter: 191
  episodes_total: 3360
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 2.0604035628368575
          entropy_coeff: 0.01
          kl: 0.018441900084007964
          policy_loss: -0.04189523018748986
          total_loss: 0.29221773889931174
          vf_explained_var: 0.632693350315094
          vf_loss: 0.3360445786473233
    num_agent_steps_sampled: 229818
    num_agent_steps_trained: 229818
    num_steps_sampled: 229818
    num_steps_trained: 229818
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,83,18304.3,229818,1.96524,9.13,-1.38,52.2042


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 239814
  custom_metrics: {}
  date: 2021-11-18_23-41-43
  done: false
  episode_len_mean: 52.375
  episode_media: {}
  episode_reward_max: 9.660000000000002
  episode_reward_mean: 2.1310937500000016
  episode_reward_min: -1.3300000000000005
  episodes_this_iter: 192
  episodes_total: 3552
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 2.14138939538634
          entropy_coeff: 0.01
          kl: 0.018834669596874572
          policy_loss: -0.03535059464863651
          total_loss: 0.3255577433416849
          vf_explained_var: 0.6026785969734192
          vf_loss: 0.36325212806111656
    num_agent_steps_sampled: 239814
    num_agent_steps_trained: 239814
    num_steps_sampled: 239814
    num_steps_trained: 239814
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,84,18829.6,239814,2.13109,9.66,-1.33,52.375




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 249810
  custom_metrics: {}
  date: 2021-11-18_23-50-46
  done: false
  episode_len_mean: 51.635416666666664
  episode_media: {}
  episode_reward_max: 9.640000000000006
  episode_reward_mean: 2.212343750000002
  episode_reward_min: -1.2500000000000004
  episodes_this_iter: 192
  episodes_total: 3744
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 2.1244007219272447
          entropy_coeff: 0.01
          kl: 0.018799551536624874
          policy_loss: -0.03785467167312981
          total_loss: 0.3435990112098427
          vf_explained_var: 0.6463435888290405
          vf_loss: 0.38366314463773704
    num_agent_steps_sampled: 249810
    num_agent_steps_trained: 249810
    num_steps_sampled: 249810
    num_steps_trained: 249810
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,85,19372.2,249810,2.21234,9.64,-1.25,51.6354


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 259806
  custom_metrics: {}
  date: 2021-11-18_23-59-37
  done: false
  episode_len_mean: 51.13775510204081
  episode_media: {}
  episode_reward_max: 11.490000000000004
  episode_reward_mean: 2.159897959183675
  episode_reward_min: -1.2200000000000002
  episodes_this_iter: 196
  episodes_total: 3940
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 2.116515885371281
          entropy_coeff: 0.01
          kl: 0.018067073818812895
          policy_loss: -0.034748803492106066
          total_loss: 0.31866275272092887
          vf_explained_var: 0.5907049179077148
          vf_loss: 0.35628380134858817
    num_agent_steps_sampled: 259806
    num_agent_steps_trained: 259806
    num_steps_sampled: 259806
    num_steps_trained: 259806
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,86,19903.2,259806,2.1599,11.49,-1.22,51.1378




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 269802
  custom_metrics: {}
  date: 2021-11-19_00-09-00
  done: false
  episode_len_mean: 49.84577114427861
  episode_media: {}
  episode_reward_max: 11.510000000000005
  episode_reward_mean: 2.0262189054726383
  episode_reward_min: -1.4800000000000004
  episodes_this_iter: 201
  episodes_total: 4141
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 2.109007846184045
          entropy_coeff: 0.01
          kl: 0.018043522628879
          policy_loss: -0.033804731611454336
          total_loss: 0.274707151898154
          vf_explained_var: 0.6039535403251648
          vf_loss: 0.3113328947984879
    num_agent_steps_sampled: 269802
    num_agent_steps_trained: 269802
    num_steps_sampled: 269802
    num_steps_trained: 269802
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,87,20466.5,269802,2.02622,11.51,-1.48,49.8458


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 279798
  custom_metrics: {}
  date: 2021-11-19_00-17-46
  done: false
  episode_len_mean: 52.135416666666664
  episode_media: {}
  episode_reward_max: 11.360000000000007
  episode_reward_mean: 2.2168229166666684
  episode_reward_min: -1.3200000000000003
  episodes_this_iter: 192
  episodes_total: 4333
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 2.10813783340186
          entropy_coeff: 0.01
          kl: 0.017481179917641906
          policy_loss: -0.04048486128353596
          total_loss: 0.25676963637937344
          vf_explained_var: 0.6538762450218201
          vf_loss: 0.3006361790707365
    num_agent_steps_sampled: 279798
    num_agent_steps_trained: 279798
    num_steps_sampled: 279798
    num_steps_trained: 279798
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,88,20992.5,279798,2.21682,11.36,-1.32,52.1354


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 289794
  custom_metrics: {}
  date: 2021-11-19_00-26-37
  done: false
  episode_len_mean: 51.54123711340206
  episode_media: {}
  episode_reward_max: 17.119999999999987
  episode_reward_mean: 2.5147938144329913
  episode_reward_min: -1.1900000000000002
  episodes_this_iter: 194
  episodes_total: 4527
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 2.118730873826996
          entropy_coeff: 0.01
          kl: 0.01948271589424593
          policy_loss: -0.03383464088094999
          total_loss: 0.3367769025755559
          vf_explained_var: 0.6466079354286194
          vf_loss: 0.3720726009716381
    num_agent_steps_sampled: 289794
    num_agent_steps_trained: 289794
    num_steps_sampled: 289794
    num_steps_trained: 289794
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,89,21523.5,289794,2.51479,17.12,-1.19,51.5412




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 299790
  custom_metrics: {}
  date: 2021-11-19_00-35-42
  done: false
  episode_len_mean: 51.689119170984455
  episode_media: {}
  episode_reward_max: 9.260000000000009
  episode_reward_mean: 2.0048186528497425
  episode_reward_min: -1.3800000000000003
  episodes_this_iter: 193
  episodes_total: 4720
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 2.1354408805868235
          entropy_coeff: 0.01
          kl: 0.018330106358572316
          policy_loss: -0.04345627858668808
          total_loss: 0.28776632504841215
          vf_explained_var: 0.6315305233001709
          vf_loss: 0.33401777865028526
    num_agent_steps_sampled: 299790
    num_agent_steps_trained: 299790
    num_steps_sampled: 299790
    num_steps_trained: 299790
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,90,22068.2,299790,2.00482,9.26,-1.38,51.6891




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 309786
  custom_metrics: {}
  date: 2021-11-19_00-44-40
  done: false
  episode_len_mean: 52.642105263157895
  episode_media: {}
  episode_reward_max: 11.600000000000005
  episode_reward_mean: 2.238263157894739
  episode_reward_min: -1.4100000000000006
  episodes_this_iter: 190
  episodes_total: 4910
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 2.1027946810885125
          entropy_coeff: 0.01
          kl: 0.0183462175277999
          policy_loss: -0.03932151315962238
          total_loss: 0.2886414238214164
          vf_explained_var: 0.6551259756088257
          vf_loss: 0.3304153386099511
    num_agent_steps_sampled: 309786
    num_agent_steps_trained: 309786
    num_steps_sampled: 309786
    num_steps_trained: 309786
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,91,22606.2,309786,2.23826,11.6,-1.41,52.6421


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 319782
  custom_metrics: {}
  date: 2021-11-19_00-53-27
  done: false
  episode_len_mean: 52.463157894736845
  episode_media: {}
  episode_reward_max: 15.510000000000003
  episode_reward_mean: 2.9100526315789494
  episode_reward_min: -1.2600000000000002
  episodes_this_iter: 190
  episodes_total: 5100
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 2.1044199504526744
          entropy_coeff: 0.01
          kl: 0.01994798069329155
          policy_loss: -0.03973337729979808
          total_loss: 0.2948696993702791
          vf_explained_var: 0.7235881090164185
          vf_loss: 0.33544994432490366
    num_agent_steps_sampled: 319782
    num_agent_steps_trained: 319782
    num_steps_sampled: 319782
    num_steps_trained: 319782
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,92,23133.3,319782,2.91005,15.51,-1.26,52.4632


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 329778
  custom_metrics: {}
  date: 2021-11-19_01-02-16
  done: false
  episode_len_mean: 51.76288659793814
  episode_media: {}
  episode_reward_max: 13.470000000000004
  episode_reward_mean: 2.114123711340208
  episode_reward_min: -1.2800000000000002
  episodes_this_iter: 194
  episodes_total: 5294
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 2.1419399708868507
          entropy_coeff: 0.01
          kl: 0.018991212961536535
          policy_loss: -0.04058729334617848
          total_loss: 0.2327289716872959
          vf_explained_var: 0.7003730535507202
          vf_loss: 0.27550706072826403
    num_agent_steps_sampled: 329778
    num_agent_steps_trained: 329778
    num_steps_sampled: 329778
    num_steps_trained: 329778
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,93,23662.3,329778,2.11412,13.47,-1.28,51.7629




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 339774
  custom_metrics: {}
  date: 2021-11-19_01-11-19
  done: false
  episode_len_mean: 50.474747474747474
  episode_media: {}
  episode_reward_max: 13.710000000000004
  episode_reward_mean: 2.60318181818182
  episode_reward_min: -1.4000000000000004
  episodes_this_iter: 198
  episodes_total: 5492
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 2.1268771765461887
          entropy_coeff: 0.01
          kl: 0.019603763344010147
          policy_loss: -0.038238153082100504
          total_loss: 0.2860275623688481
          vf_explained_var: 0.6839451789855957
          vf_loss: 0.32568567594350595
    num_agent_steps_sampled: 339774
    num_agent_steps_trained: 339774
    num_steps_sampled: 339774
    num_steps_trained: 339774
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,94,24205,339774,2.60318,13.71,-1.4,50.4747


[34m[1mwandb[0m: Network error (ReadTimeout), entering retry loop.
[34m[1mwandb[0m: Network error resolved after 0:03:25.791997, resuming normal operation.
[34m[1mwandb[0m: 500 encountered ({"error":"Error 1040: Too many connections"}), retrying request
[34m[1mwandb[0m: Network error resolved after 0:01:38.887292, resuming normal operation.


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 349770
  custom_metrics: {}
  date: 2021-11-19_01-20-39
  done: false
  episode_len_mean: 51.07142857142857
  episode_media: {}
  episode_reward_max: 13.550000000000006
  episode_reward_mean: 2.4663775510204102
  episode_reward_min: -1.4200000000000004
  episodes_this_iter: 196
  episodes_total: 5688
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 2.1335576326971553
          entropy_coeff: 0.01
          kl: 0.020200070130574344
          policy_loss: -0.04220770990777453
          total_loss: 0.255559836378862
          vf_explained_var: 0.7146111726760864
          vf_loss: 0.2986505511018586
    num_agent_steps_sampled: 349770
    num_agent_steps_trained: 349770
    num_steps_sampled: 349770
    num_steps_trained: 349770
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,95,24765.7,349770,2.46638,13.55,-1.42,51.0714


[34m[1mwandb[0m: Network error (ReadTimeout), entering retry loop.
[34m[1mwandb[0m: Network error resolved after 0:05:37.093084, resuming normal operation.


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 359766
  custom_metrics: {}
  date: 2021-11-19_01-29-29
  done: false
  episode_len_mean: 50.421319796954315
  episode_media: {}
  episode_reward_max: 13.600000000000005
  episode_reward_mean: 2.7746192893401034
  episode_reward_min: -1.6200000000000008
  episodes_this_iter: 197
  episodes_total: 5885
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.1181244726401256
          entropy_coeff: 0.01
          kl: 0.017344910571450517
          policy_loss: -0.038637270700027176
          total_loss: 0.3223237641647139
          vf_explained_var: 0.695525586605072
          vf_loss: 0.3557996961389141
    num_agent_steps_sampled: 359766
    num_agent_steps_trained: 359766
    num_steps_sampled: 359766
    num_steps_trained: 359766
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,96,25295.6,359766,2.77462,13.6,-1.62,50.4213


[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: Error 1040: Too many connections (<Response [500]>)
[34m[1mwandb[0m: 500 encountered ({"errors":[{"message":"Error 1040: Too many connections","path":["project"]}],"data":{"project":null}}), retrying request
[34m[1mwandb[0m: Network error resolved after 0:00:58.750039, resuming normal operation.
[34m[1mwandb[0m: Network error (ReadTimeout), entering retry loop.
[34m[1mwandb[0m: Network error resolved after 0:00:41.190007, resuming normal operation.


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 369762
  custom_metrics: {}
  date: 2021-11-19_01-38-30
  done: false
  episode_len_mean: 52.27748691099477
  episode_media: {}
  episode_reward_max: 11.620000000000005
  episode_reward_mean: 2.4315706806282744
  episode_reward_min: -1.5800000000000007
  episodes_this_iter: 191
  episodes_total: 6076
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.114718305609791
          entropy_coeff: 0.01
          kl: 0.01629025553219718
          policy_loss: -0.04816527342189085
          total_loss: 0.21918457253923115
          vf_explained_var: 0.7068532705307007
          vf_loss: 0.2637562039878638
    num_agent_steps_sampled: 369762
    num_agent_steps_trained: 369762
    num_steps_sampled: 369762
    num_steps_trained: 369762
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,97,25836.2,369762,2.43157,11.62,-1.58,52.2775


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 379758
  custom_metrics: {}
  date: 2021-11-19_01-47-20
  done: false
  episode_len_mean: 51.3948717948718
  episode_media: {}
  episode_reward_max: 15.430000000000007
  episode_reward_mean: 2.6957435897435924
  episode_reward_min: -1.2500000000000004
  episodes_this_iter: 195
  episodes_total: 6271
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.11464097463941
          entropy_coeff: 0.01
          kl: 0.01721488290444548
          policy_loss: -0.04245764859411303
          total_loss: 0.28601943819634484
          vf_explained_var: 0.705565333366394
          vf_loss: 0.32347839473967394
    num_agent_steps_sampled: 379758
    num_agent_steps_trained: 379758
    num_steps_sampled: 379758
    num_steps_trained: 379758
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,98,26365.8,379758,2.69574,15.43,-1.25,51.3949


[34m[1mwandb[0m: Network error (ReadTimeout), entering retry loop.
[34m[1mwandb[0m: Network error resolved after 0:00:39.600672, resuming normal operation.
[34m[1mwandb[0m: 500 encountered ({"error":"Error 1135: Can't create a new thread (errno 11); if you are not out of available memory, you can consult the manual for a possible OS-dependent bug"}), retrying request
[34m[1mwandb[0m: Network error resolved after 0:00:33.278162, resuming normal operation.


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 389754
  custom_metrics: {}
  date: 2021-11-19_01-56-21
  done: false
  episode_len_mean: 50.43939393939394
  episode_media: {}
  episode_reward_max: 13.520000000000005
  episode_reward_mean: 2.6648989898989917
  episode_reward_min: -1.3500000000000008
  episodes_this_iter: 198
  episodes_total: 6469
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.1252256998096604
          entropy_coeff: 0.01
          kl: 0.017240116065177905
          policy_loss: -0.044595019281630253
          total_loss: 0.30599130257762747
          vf_explained_var: 0.6922962665557861
          vf_loss: 0.3456551515887656
    num_agent_steps_sampled: 389754
    num_agent_steps_trained: 389754
    num_steps_sampled: 389754
    num_steps_trained: 389754


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,99,26907.2,389754,2.6649,13.52,-1.35,50.4394




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 399750
  custom_metrics: {}
  date: 2021-11-19_02-05-42
  done: false
  episode_len_mean: 49.925
  episode_media: {}
  episode_reward_max: 11.670000000000005
  episode_reward_mean: 2.374950000000002
  episode_reward_min: -1.5400000000000007
  episodes_this_iter: 200
  episodes_total: 6669
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.1224945125809636
          entropy_coeff: 0.01
          kl: 0.015978475212125153
          policy_loss: -0.047905664938774586
          total_loss: 0.2034526586458226
          vf_explained_var: 0.7365640997886658
          vf_loss: 0.24831595988869368
    num_agent_steps_sampled: 399750
    num_agent_steps_trained: 399750
    num_steps_sampled: 399750
    num_steps_trained: 399750
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,100,27467.8,399750,2.37495,11.67,-1.54,49.925




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 409746
  custom_metrics: {}
  date: 2021-11-19_02-15-17
  done: false
  episode_len_mean: 49.835
  episode_media: {}
  episode_reward_max: 9.730000000000004
  episode_reward_mean: 2.543100000000002
  episode_reward_min: -1.4100000000000004
  episodes_this_iter: 200
  episodes_total: 6869
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.1109789922294846
          entropy_coeff: 0.01
          kl: 0.01630133136733677
          policy_loss: -0.048107423643558425
          total_loss: 0.22996303783535246
          vf_explained_var: 0.7589372396469116
          vf_loss: 0.27442260381717043
    num_agent_steps_sampled: 409746
    num_agent_steps_trained: 409746
    num_steps_sampled: 409746
    num_steps_trained: 409746
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,101,28042.9,409746,2.5431,9.73,-1.41,49.835


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 419742
  custom_metrics: {}
  date: 2021-11-19_02-24-16
  done: false
  episode_len_mean: 49.950248756218905
  episode_media: {}
  episode_reward_max: 11.610000000000005
  episode_reward_mean: 2.7194029850746286
  episode_reward_min: -1.5700000000000007
  episodes_this_iter: 201
  episodes_total: 7070
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.101180482006456
          entropy_coeff: 0.01
          kl: 0.016528843087667378
          policy_loss: -0.04643746427152573
          total_loss: 0.2533761336186385
          vf_explained_var: 0.7308608293533325
          vf_loss: 0.2957222229262522
    num_agent_steps_sampled: 419742
    num_agent_steps_trained: 419742
    num_steps_sampled: 419742
    num_steps_trained: 419742
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,102,28582.2,419742,2.7194,11.61,-1.57,49.9502




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 429738
  custom_metrics: {}
  date: 2021-11-19_02-33-25
  done: false
  episode_len_mean: 50.35353535353536
  episode_media: {}
  episode_reward_max: 11.520000000000005
  episode_reward_mean: 2.6353030303030325
  episode_reward_min: -1.6300000000000008
  episodes_this_iter: 198
  episodes_total: 7268
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.093954307367524
          entropy_coeff: 0.01
          kl: 0.016997459596350704
          policy_loss: -0.050104112619918664
          total_loss: 0.20647217916676383
          vf_explained_var: 0.7146643400192261
          vf_loss: 0.2517009448783704
    num_agent_steps_sampled: 429738
    num_agent_steps_trained: 429738
    num_steps_sampled: 429738
    num_steps_trained: 429738
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,103,29130.9,429738,2.6353,11.52,-1.63,50.3535




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 439734
  custom_metrics: {}
  date: 2021-11-19_02-42-33
  done: false
  episode_len_mean: 50.707070707070706
  episode_media: {}
  episode_reward_max: 11.740000000000004
  episode_reward_mean: 2.7489393939393962
  episode_reward_min: -1.6700000000000006
  episodes_this_iter: 198
  episodes_total: 7466
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.080502146147341
          entropy_coeff: 0.01
          kl: 0.016043996423119683
          policy_loss: -0.04764196864261136
          total_loss: 0.20788929754755708
          vf_explained_var: 0.7304216027259827
          vf_loss: 0.2519694689070305
    num_agent_steps_sampled: 439734
    num_agent_steps_trained: 439734
    num_steps_sampled: 439734
    num_steps_trained: 439734
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,104,29678.8,439734,2.74894,11.74,-1.67,50.7071




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 449730
  custom_metrics: {}
  date: 2021-11-19_02-51-40
  done: false
  episode_len_mean: 50.18181818181818
  episode_media: {}
  episode_reward_max: 13.720000000000004
  episode_reward_mean: 2.723636363636366
  episode_reward_min: -1.4600000000000006
  episodes_this_iter: 198
  episodes_total: 7664
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.116172733364335
          entropy_coeff: 0.01
          kl: 0.016402713855117726
          policy_loss: -0.04650248005904337
          total_loss: 0.2000929121837142
          vf_explained_var: 0.7716162204742432
          vf_loss: 0.24284549859271995
    num_agent_steps_sampled: 449730
    num_agent_steps_trained: 449730
    num_steps_sampled: 449730
    num_steps_trained: 449730
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,105,30225.7,449730,2.72364,13.72,-1.46,50.1818


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 459726
  custom_metrics: {}
  date: 2021-11-19_03-00-32
  done: false
  episode_len_mean: 50.13930348258707
  episode_media: {}
  episode_reward_max: 9.590000000000005
  episode_reward_mean: 2.503034825870649
  episode_reward_min: -1.4500000000000006
  episodes_this_iter: 201
  episodes_total: 7865
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.1198166593490355
          entropy_coeff: 0.01
          kl: 0.016972643895740904
          policy_loss: -0.05213744043745961
          total_loss: 0.20284682505247562
          vf_explained_var: 0.7612043619155884
          vf_loss: 0.25040522970476126
    num_agent_steps_sampled: 459726
    num_agent_steps_trained: 459726
    num_steps_sampled: 459726
    num_steps_trained: 459726
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,106,30757.6,459726,2.50303,9.59,-1.45,50.1393




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 469722
  custom_metrics: {}
  date: 2021-11-19_03-09-39
  done: false
  episode_len_mean: 48.74634146341464
  episode_media: {}
  episode_reward_max: 11.550000000000006
  episode_reward_mean: 2.4829756097560995
  episode_reward_min: -1.6600000000000008
  episodes_this_iter: 205
  episodes_total: 8070
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.1119358144132008
          entropy_coeff: 0.01
          kl: 0.01620910618690701
          policy_loss: -0.05265604673534628
          total_loss: 0.19069059616616885
          vf_explained_var: 0.7539527416229248
          vf_loss: 0.2398484216976806
    num_agent_steps_sampled: 469722
    num_agent_steps_trained: 469722
    num_steps_sampled: 469722
    num_steps_trained: 469722
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,107,31304.6,469722,2.48298,11.55,-1.66,48.7463




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 479718
  custom_metrics: {}
  date: 2021-11-19_03-19-16
  done: false
  episode_len_mean: 47.91304347826087
  episode_media: {}
  episode_reward_max: 13.260000000000005
  episode_reward_mean: 3.0102898550724664
  episode_reward_min: -1.33
  episodes_this_iter: 207
  episodes_total: 8277
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.087783662286628
          entropy_coeff: 0.01
          kl: 0.017225883733687605
          policy_loss: -0.05272283078966549
          total_loss: 0.22108700051295058
          vf_explained_var: 0.7719882726669312
          vf_loss: 0.2685258578980364
    num_agent_steps_sampled: 479718
    num_agent_steps_trained: 479718
    num_steps_sampled: 479718
    num_steps_trained: 479718
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,108,31881.4,479718,3.01029,13.26,-1.33,47.913


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 489714
  custom_metrics: {}
  date: 2021-11-19_03-28-11
  done: false
  episode_len_mean: 48.73300970873787
  episode_media: {}
  episode_reward_max: 11.780000000000003
  episode_reward_mean: 2.727961165048546
  episode_reward_min: -1.3900000000000006
  episodes_this_iter: 206
  episodes_total: 8483
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.0901099593285095
          entropy_coeff: 0.01
          kl: 0.01695930430784111
          policy_loss: -0.05204828778394688
          total_loss: 0.2032390396360083
          vf_explained_var: 0.7970628142356873
          vf_loss: 0.2504314831706759
    num_agent_steps_sampled: 489714
    num_agent_steps_trained: 489714
    num_steps_sampled: 489714
    num_steps_trained: 489714
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,109,32416.7,489714,2.72796,11.78,-1.39,48.733




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 499710
  custom_metrics: {}
  date: 2021-11-19_03-37-49
  done: false
  episode_len_mean: 47.680952380952384
  episode_media: {}
  episode_reward_max: 11.580000000000005
  episode_reward_mean: 2.941476190476193
  episode_reward_min: -1.3100000000000003
  episodes_this_iter: 210
  episodes_total: 8693
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.0815262493359517
          entropy_coeff: 0.01
          kl: 0.017575099968219446
          policy_loss: -0.05507742276031824
          total_loss: 0.19634034795446467
          vf_explained_var: 0.821672260761261
          vf_loss: 0.24554085034360906
    num_agent_steps_sampled: 499710
    num_agent_steps_trained: 499710
    num_steps_sampled: 499710
    num_steps_trained: 499710
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,110,32994.9,499710,2.94148,11.58,-1.31,47.681


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 509706
  custom_metrics: {}
  date: 2021-11-19_03-46-45
  done: false
  episode_len_mean: 48.34466019417476
  episode_media: {}
  episode_reward_max: 13.800000000000002
  episode_reward_mean: 2.8831067961165076
  episode_reward_min: -1.2800000000000002
  episodes_this_iter: 206
  episodes_total: 8899
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.0957370328855323
          entropy_coeff: 0.01
          kl: 0.018159608449532152
          policy_loss: -0.0553634823724872
          total_loss: 0.20544099917995529
          vf_explained_var: 0.788053572177887
          vf_loss: 0.254181947733975
    num_agent_steps_sampled: 509706
    num_agent_steps_trained: 509706
    num_steps_sampled: 509706
    num_steps_trained: 509706
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,111,33531,509706,2.88311,13.8,-1.28,48.3447




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 519702
  custom_metrics: {}
  date: 2021-11-19_03-55-52
  done: false
  episode_len_mean: 48.56038647342995
  episode_media: {}
  episode_reward_max: 9.680000000000005
  episode_reward_mean: 2.945942028985509
  episode_reward_min: -1.3400000000000005
  episodes_this_iter: 207
  episodes_total: 9106
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.1171973317981245
          entropy_coeff: 0.01
          kl: 0.01808102990645973
          policy_loss: -0.057249166836614844
          total_loss: 0.18758231650667312
          vf_explained_var: 0.8379780650138855
          vf_loss: 0.23854289205640136
    num_agent_steps_sampled: 519702
    num_agent_steps_trained: 519702
    num_steps_sampled: 519702
    num_steps_trained: 519702
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,112,34078.2,519702,2.94594,9.68,-1.34,48.5604




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 529698
  custom_metrics: {}
  date: 2021-11-19_04-05-02
  done: false
  episode_len_mean: 47.87980769230769
  episode_media: {}
  episode_reward_max: 11.490000000000007
  episode_reward_mean: 2.610096153846156
  episode_reward_min: -1.5600000000000005
  episodes_this_iter: 208
  episodes_total: 9314
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.1195581986961596
          entropy_coeff: 0.01
          kl: 0.018254066477094034
          policy_loss: -0.04817917147751483
          total_loss: 0.1987484480491668
          vf_explained_var: 0.8285041451454163
          vf_loss: 0.24039983873753004
    num_agent_steps_sampled: 529698
    num_agent_steps_trained: 529698
    num_steps_sampled: 529698
    num_steps_trained: 529698
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,113,34628.2,529698,2.6101,11.49,-1.56,47.8798




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 539694
  custom_metrics: {}
  date: 2021-11-19_04-14-07
  done: false
  episode_len_mean: 49.02439024390244
  episode_media: {}
  episode_reward_max: 11.410000000000009
  episode_reward_mean: 3.0673170731707344
  episode_reward_min: -1.4000000000000004
  episodes_this_iter: 205
  episodes_total: 9519
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.120601083834966
          entropy_coeff: 0.01
          kl: 0.01922168768645145
          policy_loss: -0.05505720004128299
          total_loss: 0.21044653127586696
          vf_explained_var: 0.801264226436615
          vf_loss: 0.25751680459950615
    num_agent_steps_sampled: 539694
    num_agent_steps_trained: 539694
    num_steps_sampled: 539694
    num_steps_trained: 539694
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,114,35173,539694,3.06732,11.41,-1.4,49.0244




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 549690
  custom_metrics: {}
  date: 2021-11-19_04-23-16
  done: false
  episode_len_mean: 48.33980582524272
  episode_media: {}
  episode_reward_max: 11.550000000000006
  episode_reward_mean: 3.1742233009708767
  episode_reward_min: -1.4200000000000004
  episodes_this_iter: 206
  episodes_total: 9725
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.0998347274510256
          entropy_coeff: 0.01
          kl: 0.018042531024264806
          policy_loss: -0.055181094752199907
          total_loss: 0.2159024575893626
          vf_explained_var: 0.8023256659507751
          vf_loss: 0.26467980692596976
    num_agent_steps_sampled: 549690
    num_agent_steps_trained: 549690
    num_steps_sampled: 549690
    num_steps_trained: 549690


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,115,35721.9,549690,3.17422,11.55,-1.42,48.3398




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 559686
  custom_metrics: {}
  date: 2021-11-19_04-32-34
  done: false
  episode_len_mean: 47.507109004739334
  episode_media: {}
  episode_reward_max: 11.490000000000009
  episode_reward_mean: 2.863127962085311
  episode_reward_min: -1.4400000000000004
  episodes_this_iter: 211
  episodes_total: 9936
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.1227961564399154
          entropy_coeff: 0.01
          kl: 0.019263017289830927
          policy_loss: -0.05861408990239722
          total_loss: 0.20953453205276754
          vf_explained_var: 0.8164215683937073
          vf_loss: 0.2601208768904688
    num_agent_steps_sampled: 559686
    num_agent_steps_trained: 559686
    num_steps_sampled: 559686
    num_steps_trained: 559686
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,116,36280,559686,2.86313,11.49,-1.44,47.5071




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 569682
  custom_metrics: {}
  date: 2021-11-19_04-41-44
  done: false
  episode_len_mean: 47.628571428571426
  episode_media: {}
  episode_reward_max: 11.130000000000008
  episode_reward_mean: 3.2582857142857167
  episode_reward_min: -1.3100000000000003
  episodes_this_iter: 210
  episodes_total: 10146
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.11908060453503
          entropy_coeff: 0.01
          kl: 0.017131163103594665
          policy_loss: -0.05552092511615079
          total_loss: 0.16086216081880794
          vf_explained_var: 0.8228986263275146
          vf_loss: 0.21155593936971154
    num_agent_steps_sampled: 569682
    num_agent_steps_trained: 569682
    num_steps_sampled: 569682
    num_steps_trained: 569682


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,117,36829.2,569682,3.25829,11.13,-1.31,47.6286


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 579678
  custom_metrics: {}
  date: 2021-11-19_04-50-39
  done: false
  episode_len_mean: 48.09615384615385
  episode_media: {}
  episode_reward_max: 11.690000000000005
  episode_reward_mean: 3.4041346153846184
  episode_reward_min: -1.4400000000000006
  episodes_this_iter: 208
  episodes_total: 10354
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000003
          cur_lr: 5.000000000000001e-05
          entropy: 2.111341636870281
          entropy_coeff: 0.01
          kl: 0.02009826119395856
          policy_loss: -0.05735939017899197
          total_loss: 0.2067697427226489
          vf_explained_var: 0.8352379202842712
          vf_loss: 0.25471831579565496
    num_agent_steps_sampled: 579678
    num_agent_steps_trained: 579678
    num_steps_sampled: 579678
    num_steps_trained: 579678
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,118,37364.7,579678,3.40413,11.69,-1.44,48.0962


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 589674
  custom_metrics: {}
  date: 2021-11-19_04-59-33
  done: false
  episode_len_mean: 49.306930693069305
  episode_media: {}
  episode_reward_max: 11.330000000000004
  episode_reward_mean: 3.5000990099009934
  episode_reward_min: -1.2300000000000002
  episodes_this_iter: 202
  episodes_total: 10556
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1075038424936166
          entropy_coeff: 0.01
          kl: 0.015525046673275561
          policy_loss: -0.05487845069277039
          total_loss: 0.2162644888735153
          vf_explained_var: 0.8320192694664001
          vf_loss: 0.25684997980916746
    num_agent_steps_sampled: 589674
    num_agent_steps_trained: 589674
    num_steps_sampled: 589674
    num_steps_trained: 589674

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,119,37898.5,589674,3.5001,11.33,-1.23,49.3069




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 599670
  custom_metrics: {}
  date: 2021-11-19_05-08-56
  done: false
  episode_len_mean: 48.14492753623188
  episode_media: {}
  episode_reward_max: 11.600000000000005
  episode_reward_mean: 3.1126086956521757
  episode_reward_min: -1.4600000000000004
  episodes_this_iter: 207
  episodes_total: 10763
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.139286236997589
          entropy_coeff: 0.01
          kl: 0.015566751928601378
          policy_loss: -0.05278885679372946
          total_loss: 0.21787676737293254
          vf_explained_var: 0.8196830153465271
          vf_loss: 0.25659547909406055
    num_agent_steps_sampled: 599670
    num_agent_steps_trained: 599670
    num_steps_sampled: 599670
    num_steps_trained: 599670


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,120,38461.6,599670,3.11261,11.6,-1.46,48.1449




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 609666
  custom_metrics: {}
  date: 2021-11-19_05-18-20
  done: false
  episode_len_mean: 48.760975609756095
  episode_media: {}
  episode_reward_max: 11.32000000000001
  episode_reward_mean: 3.1372682926829296
  episode_reward_min: -1.2900000000000005
  episodes_this_iter: 205
  episodes_total: 10968
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.112611931849675
          entropy_coeff: 0.01
          kl: 0.014644460147807853
          policy_loss: -0.05320296689695491
          total_loss: 0.1998638353216515
          vf_explained_var: 0.8067812919616699
          vf_loss: 0.24083100936588753
    num_agent_steps_sampled: 609666
    num_agent_steps_trained: 609666
    num_steps_sampled: 609666
    num_steps_trained: 609666
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,121,39025.1,609666,3.13727,11.32,-1.29,48.761


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 619662
  custom_metrics: {}
  date: 2021-11-19_05-27-13
  done: false
  episode_len_mean: 49.711442786069654
  episode_media: {}
  episode_reward_max: 11.490000000000007
  episode_reward_mean: 2.950746268656719
  episode_reward_min: -1.1900000000000004
  episodes_this_iter: 201
  episodes_total: 11169
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.10796067836773
          entropy_coeff: 0.01
          kl: 0.014237280928002012
          policy_loss: -0.05839227648093328
          total_loss: 0.18689588744723826
          vf_explained_var: 0.8090219497680664
          vf_loss: 0.2339334638890283
    num_agent_steps_sampled: 619662
    num_agent_steps_trained: 619662
    num_steps_sampled: 619662
    num_steps_trained: 619662
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,122,39558.2,619662,2.95075,11.49,-1.19,49.7114


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 629658
  custom_metrics: {}
  date: 2021-11-19_05-36-07
  done: false
  episode_len_mean: 49.73762376237624
  episode_media: {}
  episode_reward_max: 17.380000000000003
  episode_reward_mean: 3.68173267326733
  episode_reward_min: -1.4000000000000004
  episodes_this_iter: 202
  episodes_total: 11371
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1297909966194966
          entropy_coeff: 0.01
          kl: 0.016002222497750913
          policy_loss: -0.0563921935318736
          total_loss: 0.21978776465219135
          vf_explained_var: 0.8423051834106445
          vf_loss: 0.26102280303776026
    num_agent_steps_sampled: 629658
    num_agent_steps_trained: 629658
    num_steps_sampled: 629658
    num_steps_trained: 629658
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,123,40092.1,629658,3.68173,17.38,-1.4,49.7376




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 639654
  custom_metrics: {}
  date: 2021-11-19_05-45-29
  done: false
  episode_len_mean: 49.39408866995074
  episode_media: {}
  episode_reward_max: 13.620000000000005
  episode_reward_mean: 3.361034482758624
  episode_reward_min: -1.4300000000000004
  episodes_this_iter: 203
  episodes_total: 11574
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.143301048958637
          entropy_coeff: 0.01
          kl: 0.01579189890250616
          policy_loss: -0.05145470540683328
          total_loss: 0.20336802236396428
          vf_explained_var: 0.8380917906761169
          vf_loss: 0.2402798161365047
    num_agent_steps_sampled: 639654
    num_agent_steps_trained: 639654
    num_steps_sampled: 639654
    num_steps_trained: 639654
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,124,40654.7,639654,3.36103,13.62,-1.43,49.3941




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 649650
  custom_metrics: {}
  date: 2021-11-19_05-54-35
  done: false
  episode_len_mean: 49.37128712871287
  episode_media: {}
  episode_reward_max: 12.940000000000008
  episode_reward_mean: 2.893267326732676
  episode_reward_min: -1.5400000000000007
  episodes_this_iter: 202
  episodes_total: 11776
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.125171444167095
          entropy_coeff: 0.01
          kl: 0.015265013990486426
          policy_loss: -0.055566982845931236
          total_loss: 0.2001255486263801
          vf_explained_var: 0.824100911617279
          vf_loss: 0.2421686348297734
    num_agent_steps_sampled: 649650
    num_agent_steps_trained: 649650
    num_steps_sampled: 649650
    num_steps_trained: 649650
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,125,41200.7,649650,2.89327,12.94,-1.54,49.3713


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 659646
  custom_metrics: {}
  date: 2021-11-19_06-03-26
  done: false
  episode_len_mean: 51.17948717948718
  episode_media: {}
  episode_reward_max: 13.410000000000007
  episode_reward_mean: 3.468461538461542
  episode_reward_min: -1.2700000000000002
  episodes_this_iter: 195
  episodes_total: 11971
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1428585075470337
          entropy_coeff: 0.01
          kl: 0.016325118817055024
          policy_loss: -0.05611806440160817
          total_loss: 0.22383764335459255
          vf_explained_var: 0.8242485523223877
          vf_loss: 0.2641936292017185
    num_agent_steps_sampled: 659646
    num_agent_steps_trained: 659646
    num_steps_sampled: 659646
    num_steps_trained: 659646
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,126,41731.7,659646,3.46846,13.41,-1.27,51.1795




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 669642
  custom_metrics: {}
  date: 2021-11-19_06-12-33
  done: false
  episode_len_mean: 49.97487437185929
  episode_media: {}
  episode_reward_max: 11.530000000000005
  episode_reward_mean: 3.431909547738697
  episode_reward_min: -1.2500000000000007
  episodes_this_iter: 199
  episodes_total: 12170
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1193587906628726
          entropy_coeff: 0.01
          kl: 0.01443824419824966
          policy_loss: -0.058918726538343846
          total_loss: 0.17630839901640832
          vf_explained_var: 0.8812476396560669
          vf_loss: 0.22352858755501906
    num_agent_steps_sampled: 669642
    num_agent_steps_trained: 669642
    num_steps_sampled: 669642
    num_steps_trained: 669642


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,127,42278,669642,3.43191,11.53,-1.25,49.9749




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 679638
  custom_metrics: {}
  date: 2021-11-19_06-22-05
  done: false
  episode_len_mean: 49.90594059405941
  episode_media: {}
  episode_reward_max: 11.620000000000005
  episode_reward_mean: 3.4059405940594094
  episode_reward_min: -1.3100000000000005
  episodes_this_iter: 202
  episodes_total: 12372
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1350066134967958
          entropy_coeff: 0.01
          kl: 0.016218655864552976
          policy_loss: -0.05894012622931447
          total_loss: 0.20941841300489497
          vf_explained_var: 0.8356194496154785
          vf_loss: 0.2527604793116121
    num_agent_steps_sampled: 679638
    num_agent_steps_trained: 679638
    num_steps_sampled: 679638
    num_steps_trained: 679638


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,128,42850.5,679638,3.40594,11.62,-1.31,49.9059


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 689634
  custom_metrics: {}
  date: 2021-11-19_06-30-59
  done: false
  episode_len_mean: 50.39393939393939
  episode_media: {}
  episode_reward_max: 13.590000000000005
  episode_reward_mean: 3.930606060606064
  episode_reward_min: -1.5400000000000005
  episodes_this_iter: 198
  episodes_total: 12570
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.127035714033617
          entropy_coeff: 0.01
          kl: 0.016673933156821426
          policy_loss: -0.05145224574796475
          total_loss: 0.23173919179316874
          vf_explained_var: 0.8434305787086487
          vf_loss: 0.2664764895210469
    num_agent_steps_sampled: 689634
    num_agent_steps_trained: 689634
    num_steps_sampled: 689634
    num_steps_trained: 689634
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,129,43384.7,689634,3.93061,13.59,-1.54,50.3939




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 699630
  custom_metrics: {}
  date: 2021-11-19_06-40-08
  done: false
  episode_len_mean: 49.925
  episode_media: {}
  episode_reward_max: 13.540000000000006
  episode_reward_mean: 3.365800000000003
  episode_reward_min: -1.2400000000000002
  episodes_this_iter: 200
  episodes_total: 12770
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1239713835907748
          entropy_coeff: 0.01
          kl: 0.015617860999181141
          policy_loss: -0.05779731687229265
          total_loss: 0.20296678474958052
          vf_explained_var: 0.8275567889213562
          vf_loss: 0.24642437381340737
    num_agent_steps_sampled: 699630
    num_agent_steps_trained: 699630
    num_steps_sampled: 699630
    num_steps_trained: 699630
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,130,43933.3,699630,3.3658,13.54,-1.24,49.925




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 709626
  custom_metrics: {}
  date: 2021-11-19_06-49-27
  done: false
  episode_len_mean: 49.28712871287129
  episode_media: {}
  episode_reward_max: 11.670000000000003
  episode_reward_mean: 3.1470297029702996
  episode_reward_min: -1.4900000000000004
  episodes_this_iter: 202
  episodes_total: 12972
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1292646120351004
          entropy_coeff: 0.01
          kl: 0.015834354317554727
          policy_loss: -0.05952202882844136
          total_loss: 0.17231093131039485
          vf_explained_var: 0.849496603012085
          vf_loss: 0.21705296627698414
    num_agent_steps_sampled: 709626
    num_agent_steps_trained: 709626
    num_steps_sampled: 709626
    num_steps_trained: 709626


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,131,44492.5,709626,3.14703,11.67,-1.49,49.2871




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 719622
  custom_metrics: {}
  date: 2021-11-19_06-58-34
  done: false
  episode_len_mean: 49.648514851485146
  episode_media: {}
  episode_reward_max: 13.190000000000007
  episode_reward_mean: 3.70183168316832
  episode_reward_min: -1.4700000000000006
  episodes_this_iter: 202
  episodes_total: 13174
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.128817945240013
          entropy_coeff: 0.01
          kl: 0.016372076902001008
          policy_loss: -0.05782246769535758
          total_loss: 0.2108581932471643
          vf_explained_var: 0.8473324179649353
          vf_loss: 0.25267120163689505
    num_agent_steps_sampled: 719622
    num_agent_steps_trained: 719622
    num_steps_sampled: 719622
    num_steps_trained: 719622
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,132,45039.2,719622,3.70183,13.19,-1.47,49.6485


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 729618
  custom_metrics: {}
  date: 2021-11-19_07-07-29
  done: false
  episode_len_mean: 49.433497536945815
  episode_media: {}
  episode_reward_max: 13.630000000000004
  episode_reward_mean: 3.4633497536945845
  episode_reward_min: -1.1600000000000004
  episodes_this_iter: 203
  episodes_total: 13377
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1331748870481926
          entropy_coeff: 0.01
          kl: 0.01679270546069292
          policy_loss: -0.05772865702320234
          total_loss: 0.21077791034936766
          vf_explained_var: 0.8448104858398438
          vf_loss: 0.25158243459391305
    num_agent_steps_sampled: 729618
    num_agent_steps_trained: 729618
    num_steps_sampled: 729618
    num_steps_trained: 729618

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,133,45574.6,729618,3.46335,13.63,-1.16,49.4335




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 739614
  custom_metrics: {}
  date: 2021-11-19_07-16-36
  done: false
  episode_len_mean: 49.84422110552764
  episode_media: {}
  episode_reward_max: 15.48000000000001
  episode_reward_mean: 3.323768844221109
  episode_reward_min: -1.3400000000000005
  episodes_this_iter: 199
  episodes_total: 13576
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1262266097059213
          entropy_coeff: 0.01
          kl: 0.015567095104923859
          policy_loss: -0.06432107434031722
          total_loss: 0.1724247663399004
          vf_explained_var: 0.8537901639938354
          vf_loss: 0.22254431692053037
    num_agent_steps_sampled: 739614
    num_agent_steps_trained: 739614
    num_steps_sampled: 739614
    num_steps_trained: 739614
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,134,46121,739614,3.32377,15.48,-1.34,49.8442




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 749610
  custom_metrics: {}
  date: 2021-11-19_07-25-54
  done: false
  episode_len_mean: 49.95522388059702
  episode_media: {}
  episode_reward_max: 13.370000000000006
  episode_reward_mean: 3.2663184079602017
  episode_reward_min: -1.4400000000000004
  episodes_this_iter: 201
  episodes_total: 13777
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1315381841487193
          entropy_coeff: 0.01
          kl: 0.016083166951083215
          policy_loss: -0.05759525027212538
          total_loss: 0.18688597221617204
          vf_explained_var: 0.8141146898269653
          vf_loss: 0.22915713892142517
    num_agent_steps_sampled: 749610
    num_agent_steps_trained: 749610
    num_steps_sampled: 749610
    num_steps_trained: 749610

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,135,46679.2,749610,3.26632,13.37,-1.44,49.9552


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 759606
  custom_metrics: {}
  date: 2021-11-19_07-34-47
  done: false
  episode_len_mean: 49.66169154228856
  episode_media: {}
  episode_reward_max: 11.490000000000006
  episode_reward_mean: 2.9845273631840823
  episode_reward_min: -1.4200000000000006
  episodes_this_iter: 201
  episodes_total: 13978
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1412210105413414
          entropy_coeff: 0.01
          kl: 0.016298206269386672
          policy_loss: -0.06078187739463193
          total_loss: 0.17379748536140502
          vf_explained_var: 0.8138346672058105
          vf_loss: 0.21886222104609282
    num_agent_steps_sampled: 759606
    num_agent_steps_trained: 759606
    num_steps_sampled: 759606
    num_steps_trained: 759606

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,136,47212.2,759606,2.98453,11.49,-1.42,49.6617


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 769602
  custom_metrics: {}
  date: 2021-11-19_07-43-43
  done: false
  episode_len_mean: 49.80099502487562
  episode_media: {}
  episode_reward_max: 11.340000000000005
  episode_reward_mean: 3.1176616915422914
  episode_reward_min: -1.3900000000000006
  episodes_this_iter: 201
  episodes_total: 14179
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1305242258860884
          entropy_coeff: 0.01
          kl: 0.015150381217339281
          policy_loss: -0.06322469932627749
          total_loss: 0.17008646997640472
          vf_explained_var: 0.8654667735099792
          vf_loss: 0.2201019485144461
    num_agent_steps_sampled: 769602
    num_agent_steps_trained: 769602
    num_steps_sampled: 769602
    num_steps_trained: 769602


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,137,47747.8,769602,3.11766,11.34,-1.39,49.801




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 779598
  custom_metrics: {}
  date: 2021-11-19_07-53-05
  done: false
  episode_len_mean: 48.74146341463415
  episode_media: {}
  episode_reward_max: 11.620000000000005
  episode_reward_mean: 3.2226341463414663
  episode_reward_min: -1.1800000000000004
  episodes_this_iter: 205
  episodes_total: 14384
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1132136538804294
          entropy_coeff: 0.01
          kl: 0.015971102669428777
          policy_loss: -0.0636934696062549
          total_loss: 0.17917627955462942
          vf_explained_var: 0.829633355140686
          vf_loss: 0.22761771594637997
    num_agent_steps_sampled: 779598
    num_agent_steps_trained: 779598
    num_steps_sampled: 779598
    num_steps_trained: 779598
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,138,48309.6,779598,3.22263,11.62,-1.18,48.7415




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 789594
  custom_metrics: {}
  date: 2021-11-19_08-02-13
  done: false
  episode_len_mean: 48.955882352941174
  episode_media: {}
  episode_reward_max: 10.98000000000001
  episode_reward_mean: 3.2977450980392184
  episode_reward_min: -1.3900000000000003
  episodes_this_iter: 204
  episodes_total: 14588
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1128563565422733
          entropy_coeff: 0.01
          kl: 0.015210490837575592
          policy_loss: -0.06331964086427204
          total_loss: 0.146330346952544
          vf_explained_var: 0.8816300630569458
          vf_loss: 0.19612714998278183
    num_agent_steps_sampled: 789594
    num_agent_steps_trained: 789594
    num_steps_sampled: 789594
    num_steps_trained: 789594
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,139,48858,789594,3.29775,10.98,-1.39,48.9559


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 799590
  custom_metrics: {}
  date: 2021-11-19_08-11-09
  done: false
  episode_len_mean: 48.78048780487805
  episode_media: {}
  episode_reward_max: 11.440000000000007
  episode_reward_mean: 3.3812195121951247
  episode_reward_min: -1.1300000000000001
  episodes_this_iter: 205
  episodes_total: 14793
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.103546820347568
          entropy_coeff: 0.01
          kl: 0.01647607270134413
          policy_loss: -0.06341833830063617
          total_loss: 0.17299083615608948
          vf_explained_var: 0.8469244241714478
          vf_loss: 0.21991008848632615
    num_agent_steps_sampled: 799590
    num_agent_steps_trained: 799590
    num_steps_sampled: 799590
    num_steps_trained: 799590
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,140,49393.9,799590,3.38122,11.44,-1.13,48.7805




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 809586
  custom_metrics: {}
  date: 2021-11-19_08-20-16
  done: false
  episode_len_mean: 49.310344827586206
  episode_media: {}
  episode_reward_max: 11.510000000000005
  episode_reward_mean: 3.033497536945816
  episode_reward_min: -1.4700000000000004
  episodes_this_iter: 203
  episodes_total: 14996
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1267736571380893
          entropy_coeff: 0.01
          kl: 0.015877419165489265
          policy_loss: -0.06209551139906094
          total_loss: 0.16290999862055275
          vf_explained_var: 0.8484129905700684
          vf_loss: 0.21010249939010325
    num_agent_steps_sampled: 809586
    num_agent_steps_trained: 809586
    num_steps_sampled: 809586
    num_steps_trained: 809586

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,141,49941.2,809586,3.0335,11.51,-1.47,49.3103




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 819582
  custom_metrics: {}
  date: 2021-11-19_08-29-38
  done: false
  episode_len_mean: 49.351485148514854
  episode_media: {}
  episode_reward_max: 11.570000000000006
  episode_reward_mean: 3.1948514851485177
  episode_reward_min: -1.5100000000000005
  episodes_this_iter: 202
  episodes_total: 15198
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.123465376852985
          entropy_coeff: 0.01
          kl: 0.015995705987007235
          policy_loss: -0.06113033084328485
          total_loss: 0.16640832045080584
          vf_explained_var: 0.848476231098175
          vf_loss: 0.21233308572756
    num_agent_steps_sampled: 819582
    num_agent_steps_trained: 819582
    num_steps_sampled: 819582
    num_steps_trained: 819582
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,142,50502.9,819582,3.19485,11.57,-1.51,49.3515




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 829578
  custom_metrics: {}
  date: 2021-11-19_08-38-53
  done: false
  episode_len_mean: 49.93
  episode_media: {}
  episode_reward_max: 11.640000000000006
  episode_reward_mean: 3.333000000000003
  episode_reward_min: -1.3000000000000003
  episodes_this_iter: 200
  episodes_total: 15398
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.144609675206334
          entropy_coeff: 0.01
          kl: 0.0157874637593995
          policy_loss: -0.062417399618454626
          total_loss: 0.15953123840420202
          vf_explained_var: 0.8479889035224915
          vf_loss: 0.20742891808433167
    num_agent_steps_sampled: 829578
    num_agent_steps_trained: 829578
    num_steps_sampled: 829578
    num_steps_trained: 829578
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,143,51057.3,829578,3.333,11.64,-1.3,49.93




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 839574
  custom_metrics: {}
  date: 2021-11-19_08-47-59
  done: false
  episode_len_mean: 49.1078431372549
  episode_media: {}
  episode_reward_max: 13.510000000000005
  episode_reward_mean: 3.499705882352944
  episode_reward_min: -1.3000000000000003
  episodes_this_iter: 204
  episodes_total: 15602
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1246853979476485
          entropy_coeff: 0.01
          kl: 0.01643859993557296
          policy_loss: -0.06046817026320398
          total_loss: 0.1834197214237065
          vf_explained_var: 0.8708444833755493
          vf_loss: 0.22768555894909226
    num_agent_steps_sampled: 839574
    num_agent_steps_trained: 839574
    num_steps_sampled: 839574
    num_steps_trained: 839574
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,144,51604,839574,3.49971,13.51,-1.3,49.1078




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 849570
  custom_metrics: {}
  date: 2021-11-19_08-57-21
  done: false
  episode_len_mean: 48.68292682926829
  episode_media: {}
  episode_reward_max: 11.260000000000007
  episode_reward_mean: 3.589463414634149
  episode_reward_min: -1.4100000000000004
  episodes_this_iter: 205
  episodes_total: 15807
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.119340766314043
          entropy_coeff: 0.01
          kl: 0.015991157089837865
          policy_loss: -0.06805145159538283
          total_loss: 0.15704752710483602
          vf_explained_var: 0.876018226146698
          vf_loss: 0.20986252984940915
    num_agent_steps_sampled: 849570
    num_agent_steps_trained: 849570
    num_steps_sampled: 849570
    num_steps_trained: 849570
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,145,52165.8,849570,3.58946,11.26,-1.41,48.6829


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 859566
  custom_metrics: {}
  date: 2021-11-19_09-06-15
  done: false
  episode_len_mean: 49.200980392156865
  episode_media: {}
  episode_reward_max: 13.240000000000009
  episode_reward_mean: 3.0539705882352974
  episode_reward_min: -1.2900000000000003
  episodes_this_iter: 204
  episodes_total: 16011
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1278792230000936
          entropy_coeff: 0.01
          kl: 0.01578812642464396
          policy_loss: -0.06692895748345905
          total_loss: 0.14315543002089562
          vf_explained_var: 0.8676282167434692
          vf_loss: 0.19539585350545116
    num_agent_steps_sampled: 859566
    num_agent_steps_trained: 859566
    num_steps_sampled: 859566
    num_steps_trained: 859566

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,146,52700.1,859566,3.05397,13.24,-1.29,49.201




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 869562
  custom_metrics: {}
  date: 2021-11-19_09-15-28
  done: false
  episode_len_mean: 48.33495145631068
  episode_media: {}
  episode_reward_max: 13.600000000000005
  episode_reward_mean: 3.451456310679615
  episode_reward_min: -1.4900000000000004
  episodes_this_iter: 206
  episodes_total: 16217
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1232444301666504
          entropy_coeff: 0.01
          kl: 0.017321846882309366
          policy_loss: -0.06272086164768245
          total_loss: 0.17419856616797788
          vf_explained_var: 0.8778234720230103
          vf_loss: 0.21869053778474232
    num_agent_steps_sampled: 869562
    num_agent_steps_trained: 869562
    num_steps_sampled: 869562
    num_steps_trained: 869562


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,147,53252.6,869562,3.45146,13.6,-1.49,48.335


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 879558
  custom_metrics: {}
  date: 2021-11-19_09-24-24
  done: false
  episode_len_mean: 48.44927536231884
  episode_media: {}
  episode_reward_max: 13.550000000000004
  episode_reward_mean: 3.59178743961353
  episode_reward_min: -1.3200000000000003
  episodes_this_iter: 207
  episodes_total: 16424
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.136419529512704
          entropy_coeff: 0.01
          kl: 0.016025544140467425
          policy_loss: -0.0664140828926573
          total_loss: 0.15068042941012358
          vf_explained_var: 0.8900564908981323
          vf_loss: 0.20195051370203554
    num_agent_steps_sampled: 879558
    num_agent_steps_trained: 879558
    num_steps_sampled: 879558
    num_steps_trained: 879558
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,148,53788.4,879558,3.59179,13.55,-1.32,48.4493




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 889554
  custom_metrics: {}
  date: 2021-11-19_09-34-11
  done: false
  episode_len_mean: 47.476190476190474
  episode_media: {}
  episode_reward_max: 15.580000000000007
  episode_reward_mean: 3.3212857142857164
  episode_reward_min: -1.3500000000000005
  episodes_this_iter: 210
  episodes_total: 16634
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1166436348095474
          entropy_coeff: 0.01
          kl: 0.016015458056112937
          policy_loss: -0.06627682389781661
          total_loss: 0.1438516439010005
          vf_explained_var: 0.8810619711875916
          vf_loss: 0.1948096873330319
    num_agent_steps_sampled: 889554
    num_agent_steps_trained: 889554
    num_steps_sampled: 889554
    num_steps_trained: 889554


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,149,54375.2,889554,3.32129,15.58,-1.35,47.4762




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 899550
  custom_metrics: {}
  date: 2021-11-19_09-43-19
  done: false
  episode_len_mean: 48.529126213592235
  episode_media: {}
  episode_reward_max: 13.840000000000002
  episode_reward_mean: 3.6184466019417503
  episode_reward_min: -1.2800000000000002
  episodes_this_iter: 206
  episodes_total: 16840
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1231495258798563
          entropy_coeff: 0.01
          kl: 0.017540852332782905
          policy_loss: -0.05865497549885443
          total_loss: 0.19814156595017762
          vf_explained_var: 0.8652583956718445
          vf_loss: 0.23806778041189575
    num_agent_steps_sampled: 899550
    num_agent_steps_trained: 899550
    num_steps_sampled: 899550
    num_steps_trained: 89955

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,150,54923.6,899550,3.61845,13.84,-1.28,48.5291




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 909546
  custom_metrics: {}
  date: 2021-11-19_09-52-29
  done: false
  episode_len_mean: 48.50970873786408
  episode_media: {}
  episode_reward_max: 13.600000000000003
  episode_reward_mean: 3.5984466019417507
  episode_reward_min: -1.1000000000000003
  episodes_this_iter: 206
  episodes_total: 17046
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.157362671166539
          entropy_coeff: 0.01
          kl: 0.016315461454010746
          policy_loss: -0.06990668158129654
          total_loss: 0.15869666122136886
          vf_explained_var: 0.8781751990318298
          vf_loss: 0.21300830760592945
    num_agent_steps_sampled: 909546
    num_agent_steps_trained: 909546
    num_steps_sampled: 909546
    num_steps_trained: 909546


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,151,55473.5,909546,3.59845,13.6,-1.1,48.5097




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 919542
  custom_metrics: {}
  date: 2021-11-19_10-01-40
  done: false
  episode_len_mean: 48.21153846153846
  episode_media: {}
  episode_reward_max: 13.460000000000006
  episode_reward_mean: 3.1963942307692337
  episode_reward_min: -1.3000000000000003
  episodes_this_iter: 208
  episodes_total: 17254
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1475244065843913
          entropy_coeff: 0.01
          kl: 0.016561193996250303
          policy_loss: -0.06243313054502619
          total_loss: 0.15197270330069904
          vf_explained_var: 0.887698769569397
          vf_loss: 0.19815260715217864
    num_agent_steps_sampled: 919542
    num_agent_steps_trained: 919542
    num_steps_sampled: 919542
    num_steps_trained: 919542


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,152,56024.1,919542,3.19639,13.46,-1.3,48.2115


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 929538
  custom_metrics: {}
  date: 2021-11-19_10-10-36
  done: false
  episode_len_mean: 48.17307692307692
  episode_media: {}
  episode_reward_max: 13.370000000000006
  episode_reward_mean: 3.428461538461541
  episode_reward_min: -1.3600000000000003
  episodes_this_iter: 208
  episodes_total: 17462
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1453645702107362
          entropy_coeff: 0.01
          kl: 0.01576015825113545
          policy_loss: -0.06857792618150353
          total_loss: 0.12469305874458597
          vf_explained_var: 0.8715499639511108
          vf_loss: 0.17882101770722303
    num_agent_steps_sampled: 929538
    num_agent_steps_trained: 929538
    num_steps_sampled: 929538
    num_steps_trained: 929538
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,153,56560.3,929538,3.42846,13.37,-1.36,48.1731




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 939534
  custom_metrics: {}
  date: 2021-11-19_10-19-46
  done: false
  episode_len_mean: 48.916256157635466
  episode_media: {}
  episode_reward_max: 11.600000000000005
  episode_reward_mean: 3.3499507389162586
  episode_reward_min: -1.3400000000000003
  episodes_this_iter: 203
  episodes_total: 17665
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1501598986516517
          entropy_coeff: 0.01
          kl: 0.016666319222684727
          policy_loss: -0.06595496247667816
          total_loss: 0.14128755583219152
          vf_explained_var: 0.8751918077468872
          vf_loss: 0.19077615778122742
    num_agent_steps_sampled: 939534
    num_agent_steps_trained: 939534
    num_steps_sampled: 939534
    num_steps_trained: 93953

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,154,57110.3,939534,3.34995,11.6,-1.34,48.9163


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 949530
  custom_metrics: {}
  date: 2021-11-19_10-28-42
  done: false
  episode_len_mean: 48.86764705882353
  episode_media: {}
  episode_reward_max: 15.550000000000006
  episode_reward_mean: 3.4950980392156894
  episode_reward_min: -1.4500000000000004
  episodes_this_iter: 204
  episodes_total: 17869
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.132606369951164
          entropy_coeff: 0.01
          kl: 0.015655875403192146
          policy_loss: -0.07040352707111573
          total_loss: 0.13686609038006983
          vf_explained_var: 0.8519843220710754
          vf_loss: 0.19292963931737206
    num_agent_steps_sampled: 949530
    num_agent_steps_trained: 949530
    num_steps_sampled: 949530
    num_steps_trained: 949530


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,155,57646.3,949530,3.4951,15.55,-1.45,48.8676




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 959526
  custom_metrics: {}
  date: 2021-11-19_10-38-14
  done: false
  episode_len_mean: 48.07655502392345
  episode_media: {}
  episode_reward_max: 17.630000000000003
  episode_reward_mean: 3.6743540669856496
  episode_reward_min: -1.3400000000000005
  episodes_this_iter: 209
  episodes_total: 18078
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1360426322762747
          entropy_coeff: 0.01
          kl: 0.017357747907971087
          policy_loss: -0.06627179448838272
          total_loss: 0.15480935184272654
          vf_explained_var: 0.8954901695251465
          vf_loss: 0.2028984518919933
    num_agent_steps_sampled: 959526
    num_agent_steps_trained: 959526
    num_steps_sampled: 959526
    num_steps_trained: 959526


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,156,58218.5,959526,3.67435,17.63,-1.34,48.0766




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 969522
  custom_metrics: {}
  date: 2021-11-19_10-47-22
  done: false
  episode_len_mean: 47.89473684210526
  episode_media: {}
  episode_reward_max: 13.530000000000005
  episode_reward_mean: 3.458229665071773
  episode_reward_min: -1.1900000000000002
  episodes_this_iter: 209
  episodes_total: 18287
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1197847829286354
          entropy_coeff: 0.01
          kl: 0.01717194715533798
          policy_loss: -0.06855631150300684
          total_loss: 0.1472048094790084
          vf_explained_var: 0.8923365473747253
          vf_loss: 0.19783912410391083
    num_agent_steps_sampled: 969522
    num_agent_steps_trained: 969522
    num_steps_sampled: 969522
    num_steps_trained: 969522
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,157,58766.8,969522,3.45823,13.53,-1.19,47.8947


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 979518
  custom_metrics: {}
  date: 2021-11-19_10-56-20
  done: false
  episode_len_mean: 48.23671497584541
  episode_media: {}
  episode_reward_max: 11.550000000000006
  episode_reward_mean: 3.4923671497584574
  episode_reward_min: -1.4200000000000004
  episodes_this_iter: 207
  episodes_total: 18494
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.128395951655974
          entropy_coeff: 0.01
          kl: 0.016339358441008627
          policy_loss: -0.06854272517119091
          total_loss: 0.13508285014940563
          vf_explained_var: 0.8752323985099792
          vf_loss: 0.18768643283763953
    num_agent_steps_sampled: 979518
    num_agent_steps_trained: 979518
    num_steps_sampled: 979518
    num_steps_trained: 979518


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,158,59304.2,979518,3.49237,11.55,-1.42,48.2367




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 989514
  custom_metrics: {}
  date: 2021-11-19_11-05-42
  done: false
  episode_len_mean: 48.794117647058826
  episode_media: {}
  episode_reward_max: 13.280000000000008
  episode_reward_mean: 3.592058823529415
  episode_reward_min: -1.1900000000000006
  episodes_this_iter: 204
  episodes_total: 18698
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1282213323087578
          entropy_coeff: 0.01
          kl: 0.017395527991051683
          policy_loss: -0.06174960850744958
          total_loss: 0.1584171665876845
          vf_explained_var: 0.8907971978187561
          vf_loss: 0.2018197996321183
    num_agent_steps_sampled: 989514
    num_agent_steps_trained: 989514
    num_steps_sampled: 989514
    num_steps_trained: 989514
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,159,59866.3,989514,3.59206,13.28,-1.19,48.7941




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 999510
  custom_metrics: {}
  date: 2021-11-19_11-14-56
  done: false
  episode_len_mean: 48.057416267942585
  episode_media: {}
  episode_reward_max: 15.620000000000005
  episode_reward_mean: 4.139808612440195
  episode_reward_min: -1.2200000000000002
  episodes_this_iter: 209
  episodes_total: 18907
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.113344606218568
          entropy_coeff: 0.01
          kl: 0.01673916845661738
          policy_loss: -0.06493804933185991
          total_loss: 0.16223384862208123
          vf_explained_var: 0.9161248803138733
          vf_loss: 0.21017142524026394
    num_agent_steps_sampled: 999510
    num_agent_steps_trained: 999510
    num_steps_sampled: 999510
    num_steps_trained: 999510
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,160,60420.3,999510,4.13981,15.62,-1.22,48.0574




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1009506
  custom_metrics: {}
  date: 2021-11-19_11-24-08
  done: false
  episode_len_mean: 48.43689320388349
  episode_media: {}
  episode_reward_max: 11.390000000000006
  episode_reward_mean: 3.8219417475728186
  episode_reward_min: -1.2300000000000004
  episodes_this_iter: 206
  episodes_total: 19113
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1281997080787596
          entropy_coeff: 0.01
          kl: 0.015944421880221805
          policy_loss: -0.0678474310103433
          total_loss: 0.12740136742575553
          vf_explained_var: 0.8956625461578369
          vf_loss: 0.18020740828166407
    num_agent_steps_sampled: 1009506
    num_agent_steps_trained: 1009506
    num_steps_sampled: 1009506
    num_steps_trained: 100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,161,60972,1009506,3.82194,11.39,-1.23,48.4369




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1019502
  custom_metrics: {}
  date: 2021-11-19_11-33-25
  done: false
  episode_len_mean: 47.88516746411483
  episode_media: {}
  episode_reward_max: 15.670000000000005
  episode_reward_mean: 3.5708612440191416
  episode_reward_min: -1.2200000000000004
  episodes_this_iter: 209
  episodes_total: 19322
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1449732786440947
          entropy_coeff: 0.01
          kl: 0.016727890863557193
          policy_loss: -0.06674124894526837
          total_loss: 0.14406452526081884
          vf_explained_var: 0.8775285482406616
          vf_loss: 0.1941472796596078
    num_agent_steps_sampled: 1019502
    num_agent_steps_trained: 1019502
    num_steps_sampled: 1019502
    num_steps_trained: 101

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,162,61528.9,1019502,3.57086,15.67,-1.22,47.8852




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1029498
  custom_metrics: {}
  date: 2021-11-19_11-42-35
  done: false
  episode_len_mean: 48.84313725490196
  episode_media: {}
  episode_reward_max: 13.780000000000003
  episode_reward_mean: 3.6162254901960815
  episode_reward_min: -1.4800000000000004
  episodes_this_iter: 204
  episodes_total: 19526
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.138988344042177
          entropy_coeff: 0.01
          kl: 0.016206215221860453
          policy_loss: -0.06370623145314355
          total_loss: 0.14249190064771852
          vf_explained_var: 0.8975666165351868
          vf_loss: 0.19066823009293093
    num_agent_steps_sampled: 1029498
    num_agent_steps_trained: 1029498
    num_steps_sampled: 1029498
    num_steps_trained: 102

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,163,62079.6,1029498,3.61623,13.78,-1.48,48.8431


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1039494
  custom_metrics: {}
  date: 2021-11-19_11-51-33
  done: false
  episode_len_mean: 48.07177033492823
  episode_media: {}
  episode_reward_max: 11.490000000000007
  episode_reward_mean: 3.384019138755984
  episode_reward_min: -1.2300000000000002
  episodes_this_iter: 209
  episodes_total: 19735
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1587950842208175
          entropy_coeff: 0.01
          kl: 0.015935457914282292
          policy_loss: -0.07574654261407122
          total_loss: 0.09900476567439084
          vf_explained_var: 0.9031842350959778
          vf_loss: 0.1600362924464331
    num_agent_steps_sampled: 1039494
    num_agent_steps_trained: 1039494
    num_steps_sampled: 1039494
    num_steps_trained: 1039

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,164,62617.1,1039494,3.38402,11.49,-1.23,48.0718




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1049490
  custom_metrics: {}
  date: 2021-11-19_12-00-47
  done: false
  episode_len_mean: 48.1875
  episode_media: {}
  episode_reward_max: 13.770000000000003
  episode_reward_mean: 3.712836538461542
  episode_reward_min: -1.4300000000000004
  episodes_this_iter: 208
  episodes_total: 19943
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1183051803743984
          entropy_coeff: 0.01
          kl: 0.016220597604158157
          policy_loss: -0.06950805613149234
          total_loss: 0.12224796666295767
          vf_explained_var: 0.8987577557563782
          vf_loss: 0.17598652500158032
    num_agent_steps_sampled: 1049490
    num_agent_steps_trained: 1049490
    num_steps_sampled: 1049490
    num_steps_trained: 1049490
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,165,63171.5,1049490,3.71284,13.77,-1.43,48.1875




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1059486
  custom_metrics: {}
  date: 2021-11-19_12-10-02
  done: false
  episode_len_mean: 47.67942583732057
  episode_media: {}
  episode_reward_max: 13.500000000000007
  episode_reward_mean: 3.999234449760769
  episode_reward_min: -1.5400000000000005
  episodes_this_iter: 209
  episodes_total: 20152
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.133204801853402
          entropy_coeff: 0.01
          kl: 0.017286612498633155
          policy_loss: -0.06551213575951707
          total_loss: 0.16019745657649107
          vf_explained_var: 0.8834856748580933
          vf_loss: 0.20766057524405585
    num_agent_steps_sampled: 1059486
    num_agent_steps_trained: 1059486
    num_steps_sampled: 1059486
    num_steps_trained: 1059

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,166,63726.3,1059486,3.99923,13.5,-1.54,47.6794




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1069482
  custom_metrics: {}
  date: 2021-11-19_12-19-11
  done: false
  episode_len_mean: 48.49029126213592
  episode_media: {}
  episode_reward_max: 17.179999999999996
  episode_reward_mean: 3.481019417475731
  episode_reward_min: -1.2300000000000004
  episodes_this_iter: 206
  episodes_total: 20358
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1197916411252384
          entropy_coeff: 0.01
          kl: 0.016972739411056474
          policy_loss: -0.07000688841148091
          total_loss: 0.1323220828609514
          vf_explained_var: 0.8917809724807739
          vf_loss: 0.18486086477405844
    num_agent_steps_sampled: 1069482
    num_agent_steps_trained: 1069482
    num_steps_sampled: 1069482
    num_steps_trained: 1069

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,167,64274.9,1069482,3.48102,17.18,-1.23,48.4903


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1079478
  custom_metrics: {}
  date: 2021-11-19_12-28-10
  done: false
  episode_len_mean: 47.91346153846154
  episode_media: {}
  episode_reward_max: 15.440000000000005
  episode_reward_mean: 3.6080288461538497
  episode_reward_min: -1.3900000000000003
  episodes_this_iter: 208
  episodes_total: 20566
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.121123639335594
          entropy_coeff: 0.01
          kl: 0.015851293541699625
          policy_loss: -0.06823653672722264
          total_loss: 0.12107882601560313
          vf_explained_var: 0.8972551226615906
          vf_loss: 0.1744153696667762
    num_agent_steps_sampled: 1079478
    num_agent_steps_trained: 1079478
    num_steps_sampled: 1079478
    num_steps_trained: 1079

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,168,64813.8,1079478,3.60803,15.44,-1.39,47.9135




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1089474
  custom_metrics: {}
  date: 2021-11-19_12-37-20
  done: false
  episode_len_mean: 48.25480769230769
  episode_media: {}
  episode_reward_max: 13.560000000000008
  episode_reward_mean: 3.7129326923076955
  episode_reward_min: -1.5000000000000004
  episodes_this_iter: 208
  episodes_total: 20774
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1240071040798862
          entropy_coeff: 0.01
          kl: 0.01677227608568083
          policy_loss: -0.06532815386847425
          total_loss: 0.13477894753422368
          vf_explained_var: 0.9057478904724121
          vf_loss: 0.1831378297597903
    num_agent_steps_sampled: 1089474
    num_agent_steps_trained: 1089474
    num_steps_sampled: 1089474
    num_steps_trained: 1089

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,169,65363.6,1089474,3.71293,13.56,-1.5,48.2548




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1099470
  custom_metrics: {}
  date: 2021-11-19_12-46-32
  done: false
  episode_len_mean: 47.32380952380952
  episode_media: {}
  episode_reward_max: 13.540000000000008
  episode_reward_mean: 3.6898571428571456
  episode_reward_min: -1.2700000000000002
  episodes_this_iter: 210
  episodes_total: 20984
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.133776147657609
          entropy_coeff: 0.01
          kl: 0.015875972329161294
          policy_loss: -0.06790296230915757
          total_loss: 0.10920639747916382
          vf_explained_var: 0.9025681018829346
          vf_loss: 0.16227967074125946
    num_agent_steps_sampled: 1099470
    num_agent_steps_trained: 1099470
    num_steps_sampled: 1099470
    num_steps_trained: 109

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,170,65915.7,1099470,3.68986,13.54,-1.27,47.3238


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1109466
  custom_metrics: {}
  date: 2021-11-19_12-55-29
  done: false
  episode_len_mean: 47.492890995260666
  episode_media: {}
  episode_reward_max: 11.490000000000007
  episode_reward_mean: 3.7404739336492923
  episode_reward_min: -1.3700000000000003
  episodes_this_iter: 211
  episodes_total: 21195
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1162956005837543
          entropy_coeff: 0.01
          kl: 0.016066573418939637
          policy_loss: -0.06876504663522935
          total_loss: 0.12400575074967769
          vf_explained_var: 0.8988093137741089
          vf_loss: 0.17733208847710916
    num_agent_steps_sampled: 1109466
    num_agent_steps_trained: 1109466
    num_steps_sampled: 1109466
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,171,66453.2,1109466,3.74047,11.49,-1.37,47.4929


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1119462
  custom_metrics: {}
  date: 2021-11-19_13-04-25
  done: false
  episode_len_mean: 48.3252427184466
  episode_media: {}
  episode_reward_max: 11.540000000000006
  episode_reward_mean: 3.4212621359223334
  episode_reward_min: -1.4000000000000004
  episodes_this_iter: 206
  episodes_total: 21401
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.137507532734469
          entropy_coeff: 0.01
          kl: 0.016299779207240073
          policy_loss: -0.07054797322094988
          total_loss: 0.11060197955472162
          vf_explained_var: 0.8933246731758118
          vf_loss: 0.165392092124458
    num_agent_steps_sampled: 1119462
    num_agent_steps_trained: 1119462
    num_steps_sampled: 1119462
    num_steps_trained: 111946

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,172,66988.9,1119462,3.42126,11.54,-1.4,48.3252




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1129458
  custom_metrics: {}
  date: 2021-11-19_13-13-52
  done: false
  episode_len_mean: 47.45497630331754
  episode_media: {}
  episode_reward_max: 11.770000000000003
  episode_reward_mean: 3.789478672985785
  episode_reward_min: -1.2000000000000002
  episodes_this_iter: 211
  episodes_total: 21612
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.132328580062552
          entropy_coeff: 0.01
          kl: 0.0165581352981641
          policy_loss: -0.06982717696759005
          total_loss: 0.1359707282090758
          vf_explained_var: 0.9085447192192078
          vf_loss: 0.18939968839775861
    num_agent_steps_sampled: 1129458
    num_agent_steps_trained: 1129458
    num_steps_sampled: 1129458
    num_steps_trained: 1129458

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,173,67555.7,1129458,3.78948,11.77,-1.2,47.455


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1139454
  custom_metrics: {}
  date: 2021-11-19_13-22-50
  done: false
  episode_len_mean: 47.8
  episode_media: {}
  episode_reward_max: 13.410000000000005
  episode_reward_mean: 3.5949523809523845
  episode_reward_min: -1.4300000000000004
  episodes_this_iter: 210
  episodes_total: 21822
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.111138765112942
          entropy_coeff: 0.01
          kl: 0.016598222132283548
          policy_loss: -0.06948087827220946
          total_loss: 0.12604162981316408
          vf_explained_var: 0.886981725692749
          vf_loss: 0.17882106850441754
    num_agent_steps_sampled: 1139454
    num_agent_steps_trained: 1139454
    num_steps_sampled: 1139454
    num_steps_trained: 1139454
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,174,68093.4,1139454,3.59495,13.41,-1.43,47.8


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1149450
  custom_metrics: {}
  date: 2021-11-19_13-31-48
  done: false
  episode_len_mean: 48.23671497584541
  episode_media: {}
  episode_reward_max: 11.550000000000004
  episode_reward_mean: 4.132318840579713
  episode_reward_min: -1.3700000000000003
  episodes_this_iter: 207
  episodes_total: 22029
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1221282804826176
          entropy_coeff: 0.01
          kl: 0.017937210030978465
          policy_loss: -0.06714520598020408
          total_loss: 0.155172944947108
          vf_explained_var: 0.8896703720092773
          vf_loss: 0.20267622721546055
    num_agent_steps_sampled: 1149450
    num_agent_steps_trained: 1149450
    num_steps_sampled: 1149450
    num_steps_trained: 11494

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,175,68632.1,1149450,4.13232,11.55,-1.37,48.2367




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1159446
  custom_metrics: {}
  date: 2021-11-19_13-41-00
  done: false
  episode_len_mean: 47.54976303317535
  episode_media: {}
  episode_reward_max: 13.420000000000007
  episode_reward_mean: 3.5796682464455
  episode_reward_min: -1.1500000000000004
  episodes_this_iter: 211
  episodes_total: 22240
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1228926635889644
          entropy_coeff: 0.01
          kl: 0.016314397692651778
          policy_loss: -0.07010958979132617
          total_loss: 0.12541152592936877
          vf_explained_var: 0.902228832244873
          vf_loss: 0.17958380332007165
    num_agent_steps_sampled: 1159446
    num_agent_steps_trained: 1159446
    num_steps_sampled: 1159446
    num_steps_trained: 115944

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,176,69184.2,1159446,3.57967,13.42,-1.15,47.5498




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1169442
  custom_metrics: {}
  date: 2021-11-19_13-50-26
  done: false
  episode_len_mean: 46.990566037735846
  episode_media: {}
  episode_reward_max: 11.640000000000004
  episode_reward_mean: 3.648962264150947
  episode_reward_min: -1.4200000000000004
  episodes_this_iter: 212
  episodes_total: 22452
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.132500566440414
          entropy_coeff: 0.01
          kl: 0.016913495806971702
          policy_loss: -0.0722811968890699
          total_loss: 0.1316809207821812
          vf_explained_var: 0.8908615112304688
          vf_loss: 0.1867560646272298
    num_agent_steps_sampled: 1169442
    num_agent_steps_trained: 1169442
    num_steps_sampled: 1169442
    num_steps_trained: 116944

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,177,69749.4,1169442,3.64896,11.64,-1.42,46.9906




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1179438
  custom_metrics: {}
  date: 2021-11-19_13-59-48
  done: false
  episode_len_mean: 46.967136150234744
  episode_media: {}
  episode_reward_max: 11.580000000000005
  episode_reward_mean: 3.5183568075117395
  episode_reward_min: -1.2900000000000003
  episodes_this_iter: 213
  episodes_total: 22665
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1398875660685652
          entropy_coeff: 0.01
          kl: 0.017159654855527447
          policy_loss: -0.07095698384483004
          total_loss: 0.12274051167302703
          vf_explained_var: 0.9070926308631897
          vf_loss: 0.1760045315519566
    num_agent_steps_sampled: 1179438
    num_agent_steps_trained: 1179438
    num_steps_sampled: 1179438
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,178,70311.4,1179438,3.51836,11.58,-1.29,46.9671




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1189434
  custom_metrics: {}
  date: 2021-11-19_14-09-00
  done: false
  episode_len_mean: 47.29857819905213
  episode_media: {}
  episode_reward_max: 13.560000000000006
  episode_reward_mean: 4.050331753554506
  episode_reward_min: -1.1400000000000001
  episodes_this_iter: 211
  episodes_total: 22876
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.146425505479177
          entropy_coeff: 0.01
          kl: 0.017828416768532338
          policy_loss: -0.0692170857792891
          total_loss: 0.13210282262150713
          vf_explained_var: 0.8772870898246765
          vf_loss: 0.1821688006169174
    num_agent_steps_sampled: 1189434
    num_agent_steps_trained: 1189434
    num_steps_sampled: 1189434
    num_steps_trained: 118943

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,179,70864.1,1189434,4.05033,13.56,-1.14,47.2986




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1199430
  custom_metrics: {}
  date: 2021-11-19_14-18-12
  done: false
  episode_len_mean: 47.880382775119614
  episode_media: {}
  episode_reward_max: 13.310000000000008
  episode_reward_mean: 3.6775119617224914
  episode_reward_min: -1.6000000000000005
  episodes_this_iter: 209
  episodes_total: 23085
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1369849511657852
          entropy_coeff: 0.01
          kl: 0.015870235194004764
          policy_loss: -0.07014658582594571
          total_loss: 0.108819935949408
          vf_explained_var: 0.8985688090324402
          vf_loss: 0.16418199085005855
    num_agent_steps_sampled: 1199430
    num_agent_steps_trained: 1199430
    num_steps_sampled: 1199430
    num_steps_trained: 119

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,180,71415.6,1199430,3.67751,13.31,-1.6,47.8804




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1209426
  custom_metrics: {}
  date: 2021-11-19_14-27-22
  done: false
  episode_len_mean: 47.75598086124402
  episode_media: {}
  episode_reward_max: 11.510000000000005
  episode_reward_mean: 3.5612918660287116
  episode_reward_min: -1.4700000000000004
  episodes_this_iter: 209
  episodes_total: 23294
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1318865357153864
          entropy_coeff: 0.01
          kl: 0.016239719263516764
          policy_loss: -0.067629702200698
          total_loss: 0.12038286293012876
          vf_explained_var: 0.8861465454101562
          vf_loss: 0.1723353188393271
    num_agent_steps_sampled: 1209426
    num_agent_steps_trained: 1209426
    num_steps_sampled: 1209426
    num_steps_trained: 12094

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,181,71965.6,1209426,3.56129,11.51,-1.47,47.756


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1219422
  custom_metrics: {}
  date: 2021-11-19_14-36-18
  done: false
  episode_len_mean: 48.68780487804878
  episode_media: {}
  episode_reward_max: 13.380000000000006
  episode_reward_mean: 3.7815121951219544
  episode_reward_min: -1.2900000000000003
  episodes_this_iter: 205
  episodes_total: 23499
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.161163891630479
          entropy_coeff: 0.01
          kl: 0.018155697971804215
          policy_loss: -0.06917311402938205
          total_loss: 0.14966117294815665
          vf_explained_var: 0.8865032196044922
          vf_loss: 0.19908497493609367
    num_agent_steps_sampled: 1219422
    num_agent_steps_trained: 1219422
    num_steps_sampled: 1219422
    num_steps_trained: 121

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,182,72501.8,1219422,3.78151,13.38,-1.29,48.6878




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1229418
  custom_metrics: {}
  date: 2021-11-19_14-45-50
  done: false
  episode_len_mean: 47.5260663507109
  episode_media: {}
  episode_reward_max: 11.660000000000004
  episode_reward_mean: 3.6336966824644583
  episode_reward_min: -1.3500000000000003
  episodes_this_iter: 211
  episodes_total: 23710
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.155574660487922
          entropy_coeff: 0.01
          kl: 0.01677190834448781
          policy_loss: -0.07273966067205695
          total_loss: 0.12154350413149484
          vf_explained_var: 0.9031069874763489
          vf_loss: 0.17763040609962594
    num_agent_steps_sampled: 1229418
    num_agent_steps_trained: 1229418
    num_steps_sampled: 1229418
    num_steps_trained: 12294

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,183,73073.8,1229418,3.6337,11.66,-1.35,47.5261


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1239414
  custom_metrics: {}
  date: 2021-11-19_14-54-46
  done: false
  episode_len_mean: 47.85096153846154
  episode_media: {}
  episode_reward_max: 13.540000000000006
  episode_reward_mean: 3.8589423076923115
  episode_reward_min: -1.2600000000000002
  episodes_this_iter: 208
  episodes_total: 23918
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.165029764247228
          entropy_coeff: 0.01
          kl: 0.017940197846153257
          policy_loss: -0.062301903444849774
          total_loss: 0.15741322441600172
          vf_explained_var: 0.8916301131248474
          vf_loss: 0.20049541056133716
    num_agent_steps_sampled: 1239414
    num_agent_steps_trained: 1239414
    num_steps_sampled: 1239414
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,184,73609.5,1239414,3.85894,13.54,-1.26,47.851


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1249410
  custom_metrics: {}
  date: 2021-11-19_15-03-45
  done: false
  episode_len_mean: 47.714285714285715
  episode_media: {}
  episode_reward_max: 13.250000000000005
  episode_reward_mean: 3.389238095238098
  episode_reward_min: -1.3100000000000003
  episodes_this_iter: 210
  episodes_total: 24128
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.180219031050502
          entropy_coeff: 0.01
          kl: 0.01560458169163727
          policy_loss: -0.06964567655912975
          total_loss: 0.11155740895623582
          vf_explained_var: 0.8996003270149231
          vf_loss: 0.1674560859696528
    num_agent_steps_sampled: 1249410
    num_agent_steps_trained: 1249410
    num_steps_sampled: 1249410
    num_steps_trained: 12494

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,185,74148.8,1249410,3.38924,13.25,-1.31,47.7143




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1259406
  custom_metrics: {}
  date: 2021-11-19_15-12-55
  done: false
  episode_len_mean: 47.8421052631579
  episode_media: {}
  episode_reward_max: 13.580000000000005
  episode_reward_mean: 3.9287081339712953
  episode_reward_min: -1.1900000000000002
  episodes_this_iter: 209
  episodes_total: 24337
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1816094185214445
          entropy_coeff: 0.01
          kl: 0.01706629242890284
          policy_loss: -0.06791903973069888
          total_loss: 0.13097198607951338
          vf_explained_var: 0.8936988115310669
          vf_loss: 0.18182797246597648
    num_agent_steps_sampled: 1259406
    num_agent_steps_trained: 1259406
    num_steps_sampled: 1259406
    num_steps_trained: 1259

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,186,74698.3,1259406,3.92871,13.58,-1.19,47.8421


Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1269402
  custom_metrics: {}
  date: 2021-11-19_15-21-52
  done: false
  episode_len_mean: 48.08173076923077
  episode_media: {}
  episode_reward_max: 13.470000000000006
  episode_reward_mean: 3.2993269230769253
  episode_reward_min: -1.1800000000000002
  episodes_this_iter: 208
  episodes_total: 24545
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.201094838797328
          entropy_coeff: 0.01
          kl: 0.016557770503100706
          policy_loss: -0.06418156716489352
          total_loss: 0.13493029851385827
          vf_explained_var: 0.8977556824684143
          vf_loss: 0.1834021413009651
    num_agent_steps_sampled: 1269402
    num_agent_steps_trained: 1269402
    num_steps_sampled: 1269402
    num_steps_trained: 1269

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,187,75235.1,1269402,3.29933,13.47,-1.18,48.0817




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1279398
  custom_metrics: {}
  date: 2021-11-19_15-31-13
  done: false
  episode_len_mean: 47.64114832535885
  episode_media: {}
  episode_reward_max: 13.740000000000004
  episode_reward_mean: 3.703444976076558
  episode_reward_min: -1.4300000000000006
  episodes_this_iter: 209
  episodes_total: 24754
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.187438356660935
          entropy_coeff: 0.01
          kl: 0.01629508388726498
          policy_loss: -0.06586064897738217
          total_loss: 0.12612879602182944
          vf_explained_var: 0.8742015957832336
          vf_loss: 0.17674158847419522
    num_agent_steps_sampled: 1279398
    num_agent_steps_trained: 1279398
    num_steps_sampled: 1279398
    num_steps_trained: 12793

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,188,75796.7,1279398,3.70344,13.74,-1.43,47.6411




Result for PPO_my_env_ab24a_00000:
  agent_timesteps_total: 1289394
  custom_metrics: {}
  date: 2021-11-19_15-40-37
  done: false
  episode_len_mean: 47.89047619047619
  episode_media: {}
  episode_reward_max: 13.600000000000003
  episode_reward_mean: 3.9950000000000037
  episode_reward_min: -1.1700000000000002
  episodes_this_iter: 210
  episodes_total: 24964
  experiment_id: 61c3754b92a44256b5093c1912448244
  hostname: linar-Z390-GAMING-X
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.1732201084075684
          entropy_coeff: 0.01
          kl: 0.016140844397435496
          policy_loss: -0.0701603643100726
          total_loss: 0.11665490858136603
          vf_explained_var: 0.9242907166481018
          vf_loss: 0.1717766118191003
    num_agent_steps_sampled: 1289394
    num_agent_steps_trained: 1289394
    num_steps_sampled: 1289394
    num_steps_trained: 1289

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_ab24a_00000,RUNNING,192.168.3.5:101983,189,76360.9,1289394,3.995,13.6,-1.17,47.8905




In [None]:
!l