In [6]:
from ray import tune
from ray.rllib.agents.ppo import PPOTrainer
from ray.rllib.agents.dqn import DQNTrainer
from ray.rllib.agents.a3c import A3CTrainer, A2CTrainer
from ray.tune.registry import register_env


from collections import defaultdict
import gym
from gym import spaces

import numpy as np
from math import pi

Run tensorboard ... `tensorboard --logdir results`

In [7]:
class IterationOrRewardStopper(tune.Stopper):
    def __init__(self, max_iterations=10000, target_reward=475, reward_window=100):
        self._max_iterations = max_iterations
        self._target_reward = target_reward
        self._reward_window = reward_window

        self._iter = defaultdict(lambda: 0)
        self._rewards = defaultdict(lambda: [])

    def __call__(self, trial_id, result):
        self._iter[trial_id] += 1

        if self._iter[trial_id] >= self._max_iterations:
            return True
        
        self._rewards[trial_id].append(result["episode_reward_mean"])
        if len(self._rewards[trial_id]) > self._reward_window:
            self._rewards[trial_id].pop(0)

        if sum(self._rewards[trial_id]) >= self._target_reward * self._reward_window:
            return True

    def stop_all(self):
        return False

In [8]:
experiments = []

In [9]:
experiments.append(
    tune.Experiment(
        name="ppo_cartpole",
        run=PPOTrainer,
        local_dir="./results",
        stop=IterationOrRewardStopper(),
        config={
            "env": "CartPole-v1",
        },
        num_samples=5
    )
)

experiments.append(
    tune.Experiment(
        name="ppo_cartpole_gpu",
        run=PPOTrainer,
        local_dir="./results",
        stop=IterationOrRewardStopper(),
        config={
            "env": "CartPole-v1",
            "num_gpus": 1,
        },
        num_samples=5
    )
)

In [10]:
tune.run_experiments(experiments)

2022-07-30 15:06:07,237	INFO experiment.py:452 -- Running with multiple concurrent experiments. All experiments will be using the same SearchAlgorithm.
[2m[33m(raylet)[0m bash: /home/ibraheem/miniconda3/envs/tf/lib/libtinfo.so.6: no version information available (required by bash)
[2m[36m(PPOTrainer pid=44132)[0m 2022-07-30 15:06:10,818	INFO trainer.py:2332 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPOTrainer pid=44132)[0m 2022-07-30 15:06:10,818	INFO ppo.py:414 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=44132)[0m 2022-07-30 15:06:10,818	INFO trainer.py:903 -- Current log_level is WARN. For more information, set 'log_leve

Trial name,status,loc
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132
PPOTrainer_CartPole-v1_c1a07_00000,PENDING,
PPOTrainer_CartPole-v1_c1a07_00001,PENDING,
PPOTrainer_CartPole-v1_c1a07_00002,PENDING,
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,


[2m[33m(raylet)[0m bash: /home/ibraheem/miniconda3/envs/tf/lib/libtinfo.so.6: no version information available (required by bash)
[2m[36m(PPOTrainer pid=44312)[0m 2022-07-30 15:06:19,572	INFO trainer.py:2332 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPOTrainer pid=44312)[0m 2022-07-30 15:06:19,573	INFO ppo.py:414 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=44312)[0m 2022-07-30 15:06:19,573	INFO trainer.py:903 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[33m(raylet)[0m bash: /home/ibraheem/miniconda3/envs/tf/lib/libtinfo.so.6: no version informatio

Trial name,status,loc
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132
PPOTrainer_CartPole-v1_c1a07_00001,PENDING,
PPOTrainer_CartPole-v1_c1a07_00002,PENDING,
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,


[2m[33m(raylet)[0m bash: /home/ibraheem/miniconda3/envs/tf/lib/libtinfo.so.6: no version information available (required by bash)
[2m[36m(PPOTrainer pid=44450)[0m 2022-07-30 15:06:27,361	INFO trainer.py:2332 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPOTrainer pid=44450)[0m 2022-07-30 15:06:27,362	INFO ppo.py:414 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=44450)[0m 2022-07-30 15:06:27,362	INFO trainer.py:903 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[33m(raylet)[0m bash: /home/ibraheem/miniconda3/envs/tf/lib/libtinfo.so.6: no version informatio

Trial name,status,loc
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132
PPOTrainer_CartPole-v1_c1a07_00002,PENDING,
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,


[2m[33m(raylet)[0m bash: /home/ibraheem/miniconda3/envs/tf/lib/libtinfo.so.6: no version information available (required by bash)
[2m[36m(PPOTrainer pid=44601)[0m 2022-07-30 15:06:35,229	INFO trainer.py:2332 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPOTrainer pid=44601)[0m 2022-07-30 15:06:35,229	INFO ppo.py:414 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=44601)[0m 2022-07-30 15:06:35,229	INFO trainer.py:903 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[33m(raylet)[0m bash: /home/ibraheem/miniconda3/envs/tf/lib/libtinfo.so.6: no version informatio

Trial name,status,loc
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,


Result for PPOTrainer_CartPole-v1_c1a07_00001:
  agent_timesteps_total: 4000
  counters:
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_env_steps_sampled: 4000
    num_env_steps_trained: 4000
  custom_metrics: {}
  date: 2022-07-30_15-06-34
  done: false
  episode_len_mean: 21.988950276243095
  episode_media: {}
  episode_reward_max: 95.0
  episode_reward_mean: 21.988950276243095
  episode_reward_min: 10.0
  episodes_this_iter: 181
  episodes_total: 181
  experiment_id: 56b0ac4ea0c44796871555a64d0eb49a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.6657003164291382
          entropy_coeff: 0.0
          kl: 0.027774671092629433
          model: {}
          policy_loss: -0.04118102788925171
          total_loss: 8.780881881713867
          vf_explained_var: 0.007304877508431673
          



Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,1.0,2.71995,4000.0,21.989,95.0,10.0,21.989
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 4000
  counters:
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_env_steps_sampled: 4000
    num_env_steps_trained: 4000
  custom_metrics: {}
  date: 2022-07-30_15-06-19
  done: false
  episode_len_mean: 21.619565217391305
  episode_media: {}
  episode_reward_max: 72.0
  episode_reward_mean: 21.619565217391305
  episode_reward_min: 9.0
  episodes_this_iter: 184
  episodes_total: 184
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.6667596101760864
          entropy_coeff: 0.0
          kl: 0.027466168627142906
          model: {}
          policy_loss: -0.043895915150642395
          total_loss: 8.724431037902832
          vf_explained_var: -0.0008477801457047462
          vf_loss: 8.76283454895019



Result for PPOTrainer_CartPole-v1_c1a07_00002:
  agent_timesteps_total: 4000
  counters:
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_env_steps_sampled: 4000
    num_env_steps_trained: 4000
  custom_metrics: {}
  date: 2022-07-30_15-06-43
  done: false
  episode_len_mean: 21.21276595744681
  episode_media: {}
  episode_reward_max: 89.0
  episode_reward_mean: 21.21276595744681
  episode_reward_min: 8.0
  episodes_this_iter: 188
  episodes_total: 188
  experiment_id: efef1a2bc4c54296b5efbb75abba9fd0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.6663875579833984
          entropy_coeff: 0.0
          kl: 0.02798312157392502
          model: {}
          policy_loss: -0.04237547144293785
          total_loss: 8.747772216796875
          vf_explained_var: 0.00045081903226673603
          vf

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,2.0,6.799,8000.0,40.77,179.0,10.0,40.77
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,2.0,6.6144,8000.0,39.4059,145.0,9.0,39.4059
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,2.0,7.52942,8000.0,41.37,253.0,10.0,41.37
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,2.0,8.17412,8000.0,42.69,133.0,9.0,42.69
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00001:
  agent_timesteps_total: 12000
  counters:
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_env_steps_sampled: 12000
    num_env_steps_trained: 12000
  custom_metrics: {}
  date: 2022-07-30_15-06-47
  done: false
  episode_len_mean: 64.87
  episode_media: {}
  episode_reward_max: 182.0
  episode_reward_mean: 64.87
  episode_reward_min: 9.0
  episodes_this_iter: 41
  episodes_total: 323
  experiment_id: 56b0ac4ea0c44796871555a64d0eb49a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.30000001192092896
          cur_lr: 4.999999873689376e-05
          entropy: 0.5823067426681519
          entropy_coeff: 0.0
          kl: 0.011854147538542747
          model: {}
          policy_loss: -0.02405570261180401
          total_loss: 9.0316743850708
          vf_explained_var: 0.122768834233284
          vf_loss: 9.052173614501953


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,4.0,14.4689,16000.0,90.81,311.0,11.0,90.81
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,4.0,14.6854,16000.0,95.18,464.0,9.0,95.18
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,4.0,14.7504,16000.0,97.13,445.0,10.0,97.13
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,3.0,13.0428,12000.0,68.4,276.0,11.0,68.4
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 16000
  counters:
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_env_steps_sampled: 16000
    num_env_steps_trained: 16000
  custom_metrics: {}
  date: 2022-07-30_15-06-54
  done: false
  episode_len_mean: 99.21
  episode_media: {}
  episode_reward_max: 345.0
  episode_reward_mean: 99.21
  episode_reward_min: 11.0
  episodes_this_iter: 20
  episodes_total: 327
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.5522253513336182
          entropy_coeff: 0.0
          kl: 0.0071579948998987675
          model: {}
          policy_loss: -0.018017884343862534
          total_loss: 9.474190711975098
          vf_explained_var: 0.037355903536081314
          vf_loss: 9.490777015686035
        train: null

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,6.0,22.3411,24000.0,163.66,500.0,11.0,163.66
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,5.0,18.3884,20000.0,130.12,500.0,10.0,130.12
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,5.0,18.7833,20000.0,130.37,445.0,10.0,130.37
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,5.0,22.8114,20000.0,132.27,357.0,11.0,132.27
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00001:
  agent_timesteps_total: 28000
  counters:
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_env_steps_sampled: 28000
    num_env_steps_trained: 28000
  custom_metrics: {}
  date: 2022-07-30_15-07-02
  done: false
  episode_len_mean: 197.62
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 197.62
  episode_reward_min: 16.0
  episodes_this_iter: 14
  episodes_total: 379
  experiment_id: 56b0ac4ea0c44796871555a64d0eb49a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.07500000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.5401324033737183
          entropy_coeff: 0.0
          kl: 0.008230281062424183
          model: {}
          policy_loss: -0.014089838601648808
          total_loss: 9.528433799743652
          vf_explained_var: 0.10691628605127335
          vf_loss: 9.54190635

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,7.0,26.0901,28000.0,194.82,500.0,18.0,194.82
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,7.0,25.8843,28000.0,197.62,500.0,16.0,197.62
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,7.0,25.8505,28000.0,197.68,500.0,25.0,197.68
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,6.0,27.5579,24000.0,164.54,500.0,11.0,164.54
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00001:
  agent_timesteps_total: 36000
  counters:
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_env_steps_sampled: 36000
    num_env_steps_trained: 36000
  custom_metrics: {}
  date: 2022-07-30_15-07-10
  done: false
  episode_len_mean: 258.83
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 258.83
  episode_reward_min: 16.0
  episodes_this_iter: 8
  episodes_total: 397
  experiment_id: 56b0ac4ea0c44796871555a64d0eb49a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.07500000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.5081638097763062
          entropy_coeff: 0.0
          kl: 0.003305105958133936
          model: {}
          policy_loss: -0.01240626722574234
          total_loss: 9.629762649536133
          vf_explained_var: 0.11821103096008301
          vf_loss: 9.6419210433

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,9.0,33.153,36000.0,251.57,500.0,18.0,251.57
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,9.0,33.0345,36000.0,258.83,500.0,16.0,258.83
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,9.0,32.7907,36000.0,257.3,500.0,30.0,257.3
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,7.0,32.4436,28000.0,199.85,500.0,11.0,199.85
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 32000
  counters:
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_env_steps_sampled: 32000
    num_env_steps_trained: 32000
  custom_metrics: {}
  date: 2022-07-30_15-07-13
  done: false
  episode_len_mean: 232.2
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 232.2
  episode_reward_min: 20.0
  episodes_this_iter: 13
  episodes_total: 374
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.5037229657173157
          entropy_coeff: 0.0
          kl: 0.0072379386983811855
          model: {}
          policy_loss: -0.014310326427221298
          total_loss: 9.511494636535645
          vf_explained_var: 0.2140815258026123
          vf_loss: 9.524357795715332
        train: null
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,11.0,40.3682,44000.0,308.45,500.0,39.0,308.45
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,11.0,40.3929,44000.0,322.18,500.0,16.0,322.18
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,10.0,36.653,40000.0,281.72,500.0,73.0,281.72
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,9.0,42.2161,36000.0,259.78,500.0,20.0,259.78
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00002:
  agent_timesteps_total: 44000
  counters:
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_env_steps_sampled: 44000
    num_env_steps_trained: 44000
  custom_metrics: {}
  date: 2022-07-30_15-07-19
  done: false
  episode_len_mean: 304.42
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 304.42
  episode_reward_min: 82.0
  episodes_this_iter: 9
  episodes_total: 422
  experiment_id: efef1a2bc4c54296b5efbb75abba9fd0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.01875000074505806
          cur_lr: 4.999999873689376e-05
          entropy: 0.49556034803390503
          entropy_coeff: 0.0
          kl: 0.006505100056529045
          model: {}
          policy_loss: -0.010909529402852058
          total_loss: 9.432598114013672
          vf_explained_var: 0.173780620098114
          vf_loss: 9.4433870315

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,12.0,44.111,48000.0,335.8,500.0,46.0,335.8
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,12.0,44.0513,48000.0,353.48,500.0,29.0,353.48
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,11.0,40.1913,44000.0,304.42,500.0,82.0,304.42
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,10.0,47.0953,40000.0,292.46,500.0,20.0,292.46
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00001:
  agent_timesteps_total: 52000
  counters:
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_env_steps_sampled: 52000
    num_env_steps_trained: 52000
  custom_metrics: {}
  date: 2022-07-30_15-07-25
  done: false
  episode_len_mean: 378.9
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 378.9
  episode_reward_min: 29.0
  episodes_this_iter: 9
  episodes_total: 430
  experiment_id: 56b0ac4ea0c44796871555a64d0eb49a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.01875000074505806
          cur_lr: 4.999999873689376e-05
          entropy: 0.4979231655597687
          entropy_coeff: 0.0
          kl: 0.0016054321313276887
          model: {}
          policy_loss: -0.007211345247924328
          total_loss: 9.462329864501953
          vf_explained_var: 0.049633730202913284
          vf_loss: 9.469511032

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,13.0,48.0035,52000.0,359.8,500.0,46.0,359.8
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,14.0,51.7467,56000.0,402.21,500.0,29.0,402.21
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,13.0,47.9173,52000.0,349.35,500.0,82.0,349.35
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,11.0,51.9834,44000.0,322.52,500.0,20.0,322.52
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 60000
  counters:
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_env_steps_sampled: 60000
    num_env_steps_trained: 60000
  custom_metrics: {}
  date: 2022-07-30_15-07-32
  done: false
  episode_len_mean: 388.14
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 388.14
  episode_reward_min: 46.0
  episodes_this_iter: 8
  episodes_total: 461
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.00937500037252903
          cur_lr: 4.999999873689376e-05
          entropy: 0.5415938496589661
          entropy_coeff: 0.0
          kl: 0.0057192351669073105
          model: {}
          policy_loss: -0.0090967807918787
          total_loss: 9.480977058410645
          vf_explained_var: -0.010587438009679317
          vf_loss: 9.49001979

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,15.0,55.7038,60000.0,388.14,500.0,46.0,388.14
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,15.0,55.944,60000.0,416.62,500.0,29.0,416.62
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,14.0,51.9389,56000.0,366.97,500.0,82.0,366.97
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,12.0,57.3112,48000.0,357.18,500.0,67.0,357.18
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00002:
  agent_timesteps_total: 60000
  counters:
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_env_steps_sampled: 60000
    num_env_steps_trained: 60000
  custom_metrics: {}
  date: 2022-07-30_15-07-35
  done: false
  episode_len_mean: 383.04
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 383.04
  episode_reward_min: 82.0
  episodes_this_iter: 9
  episodes_total: 456
  experiment_id: efef1a2bc4c54296b5efbb75abba9fd0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.004687500186264515
          cur_lr: 4.999999873689376e-05
          entropy: 0.4568626284599304
          entropy_coeff: 0.0
          kl: 0.00663238950073719
          model: {}
          policy_loss: -0.005149142816662788
          total_loss: 9.250727653503418
          vf_explained_var: 0.041431084275245667
          vf_loss: 9.25584602

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,16.0,59.8047,64000.0,404.19,500.0,46.0,404.19
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,16.0,59.7233,64000.0,429.97,500.0,29.0,429.97
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,15.0,56.0192,60000.0,383.04,500.0,82.0,383.04
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,13.0,62.6075,52000.0,377.92,500.0,67.0,377.92
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00001:
  agent_timesteps_total: 68000
  counters:
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_env_steps_sampled: 68000
    num_env_steps_trained: 68000
  custom_metrics: {}
  date: 2022-07-30_15-07-40
  done: false
  episode_len_mean: 434.14
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 434.14
  episode_reward_min: 29.0
  episodes_this_iter: 10
  episodes_total: 467
  experiment_id: 56b0ac4ea0c44796871555a64d0eb49a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0023437500931322575
          cur_lr: 4.999999873689376e-05
          entropy: 0.4818911552429199
          entropy_coeff: 0.0
          kl: 0.005488591734319925
          model: {}
          policy_loss: -0.0030224050860852003
          total_loss: 9.230051040649414
          vf_explained_var: 0.014185864478349686
          vf_loss: 9.2330

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,17.0,63.8132,68000.0,423.0,500.0,46.0,423.0
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,17.0,63.5949,68000.0,434.14,500.0,29.0,434.14
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,17.0,63.6335,68000.0,412.63,500.0,82.0,412.63
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,14.0,68.0257,56000.0,397.41,500.0,116.0,397.41
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 76000
  counters:
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_env_steps_sampled: 76000
    num_env_steps_trained: 76000
  custom_metrics: {}
  date: 2022-07-30_15-07-48
  done: false
  episode_len_mean: 462.1
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 462.1
  episode_reward_min: 46.0
  episodes_this_iter: 8
  episodes_total: 493
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.004687500186264515
          cur_lr: 4.999999873689376e-05
          entropy: 0.5006824731826782
          entropy_coeff: 0.0
          kl: 0.0018642317736521363
          model: {}
          policy_loss: -0.003575832350179553
          total_loss: 9.262001037597656
          vf_explained_var: -0.0062907966785132885
          vf_loss: 9.265568

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,19.0,71.5722,76000.0,462.1,500.0,46.0,462.1
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,19.0,71.7101,76000.0,447.43,500.0,266.0,447.43
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,18.0,67.2427,72000.0,426.02,500.0,150.0,426.02
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,15.0,73.2347,60000.0,413.91,500.0,116.0,413.91
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00002:
  agent_timesteps_total: 76000
  counters:
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_env_steps_sampled: 76000
    num_env_steps_trained: 76000
  custom_metrics: {}
  date: 2022-07-30_15-07-50
  done: false
  episode_len_mean: 434.12
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 434.12
  episode_reward_min: 150.0
  episodes_this_iter: 9
  episodes_total: 489
  experiment_id: efef1a2bc4c54296b5efbb75abba9fd0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.004687500186264515
          cur_lr: 4.999999873689376e-05
          entropy: 0.4650416076183319
          entropy_coeff: 0.0
          kl: 0.002229135250672698
          model: {}
          policy_loss: -0.00039048847975209355
          total_loss: 8.558895111083984
          vf_explained_var: 0.07150112837553024
          vf_loss: 8.55927

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,20.0,75.6493,80000.0,471.98,500.0,180.0,471.98
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,20.0,75.5301,80000.0,442.62,500.0,266.0,442.62
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,20.0,74.7754,80000.0,437.88,500.0,150.0,437.88
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,15.0,73.2347,60000.0,413.91,500.0,116.0,413.91
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 64000
  counters:
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_env_steps_sampled: 64000
    num_env_steps_trained: 64000
  custom_metrics: {}
  date: 2022-07-30_15-07-55
  done: false
  episode_len_mean: 427.61
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 427.61
  episode_reward_min: 116.0
  episodes_this_iter: 8
  episodes_total: 443
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.4794973134994507
          entropy_coeff: 0.0
          kl: 0.0012003385927528143
          model: {}
          policy_loss: -0.006211066152900457
          total_loss: 9.308974266052246
          vf_explained_var: -0.013790040276944637
          vf_loss: 9.31494426727295
        train: nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,21.0,79.3342,84000.0,474.23,500.0,180.0,474.23
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,22.0,82.9529,88000.0,443.8,500.0,266.0,443.8
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,21.0,78.7441,84000.0,450.23,500.0,150.0,450.23
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,16.0,78.3496,64000.0,427.61,500.0,116.0,427.61
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 68000
  counters:
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_env_steps_sampled: 68000
    num_env_steps_trained: 68000
  custom_metrics: {}
  date: 2022-07-30_15-08-00
  done: false
  episode_len_mean: 436.02
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 436.02
  episode_reward_min: 116.0
  episodes_this_iter: 9
  episodes_total: 452
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.45570987462997437
          entropy_coeff: 0.0
          kl: 0.002903582761064172
          model: {}
          policy_loss: -0.0040038395673036575
          total_loss: 9.171207427978516
          vf_explained_var: -0.0027161750476807356
          vf_loss: 9.174630165100098
        train:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,23.0,87.0963,92000.0,483.22,500.0,180.0,483.22
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,23.0,86.8061,92000.0,441.72,500.0,255.0,441.72
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,23.0,86.4126,92000.0,458.73,500.0,150.0,458.73
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,18.0,88.3142,72000.0,442.45,500.0,116.0,442.45
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 76000
  counters:
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_env_steps_sampled: 76000
    num_env_steps_trained: 76000
  custom_metrics: {}
  date: 2022-07-30_15-08-10
  done: false
  episode_len_mean: 454.37
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 454.37
  episode_reward_min: 133.0
  episodes_this_iter: 9
  episodes_total: 469
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.4530555307865143
          entropy_coeff: 0.0
          kl: 0.003893149783834815
          model: {}
          policy_loss: -0.005552755203098059
          total_loss: 8.894270896911621
          vf_explained_var: -0.09376966953277588
          vf_loss: 8.899046897888184
        train: nul

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,24.0,91.0427,96000.0,484.52,500.0,180.0,484.52
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,25.0,94.6003,100000.0,443.09,500.0,255.0,443.09
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,24.0,90.3529,96000.0,455.37,500.0,150.0,455.37
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,19.0,93.5106,76000.0,454.37,500.0,133.0,454.37
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 100000
  counters:
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_env_steps_sampled: 100000
    num_env_steps_trained: 100000
  custom_metrics: {}
  date: 2022-07-30_15-08-12
  done: false
  episode_len_mean: 484.74
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 484.74
  episode_reward_min: 180.0
  episodes_this_iter: 8
  episodes_total: 542
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0001464843808207661
          cur_lr: 4.999999873689376e-05
          entropy: 0.4791499078273773
          entropy_coeff: 0.0
          kl: 0.007145566400140524
          model: {}
          policy_loss: -0.0007833411800675094
          total_loss: 2.6655361652374268
          vf_explained_var: -0.0003930793609470129
          vf_loss

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,26.0,98.7473,104000.0,487.68,500.0,180.0,487.68
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,26.0,98.2374,104000.0,444.99,500.0,255.0,444.99
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,26.0,97.9914,104000.0,457.03,500.0,150.0,457.03
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,20.0,98.671,80000.0,462.71,500.0,141.0,462.71
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00001:
  agent_timesteps_total: 108000
  counters:
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_env_steps_sampled: 108000
    num_env_steps_trained: 108000
  custom_metrics: {}
  date: 2022-07-30_15-08-19
  done: false
  episode_len_mean: 435.42
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 435.42
  episode_reward_min: 218.0
  episodes_this_iter: 12
  episodes_total: 558
  experiment_id: 56b0ac4ea0c44796871555a64d0eb49a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0001464843808207661
          cur_lr: 4.999999873689376e-05
          entropy: 0.4669821262359619
          entropy_coeff: 0.0
          kl: 0.010500610806047916
          model: {}
          policy_loss: -0.0050508007407188416
          total_loss: 3.252166271209717
          vf_explained_var: 0.192216694355011
          vf_loss: 3.2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,27.0,102.588,108000.0,492.24,500.0,180.0,492.24
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,28.0,105.98,112000.0,442.11,500.0,218.0,442.11
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,27.0,101.811,108000.0,461.94,500.0,178.0,461.94
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,21.0,103.883,84000.0,464.34,500.0,141.0,464.34
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 88000
  counters:
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_env_steps_sampled: 88000
    num_env_steps_trained: 88000
  custom_metrics: {}
  date: 2022-07-30_15-08-26
  done: false
  episode_len_mean: 463.07
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 463.07
  episode_reward_min: 141.0
  episodes_this_iter: 8
  episodes_total: 494
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.49026909470558167
          entropy_coeff: 0.0
          kl: 0.0035129643511027098
          model: {}
          policy_loss: -0.0021470270585268736
          total_loss: 3.5956432819366455
          vf_explained_var: -0.1060609444975853
          vf_loss: 3.597087860107422
        train: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,29.0,110.36,116000.0,495.89,500.0,180.0,495.89
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,29.0,110.067,116000.0,452.06,500.0,218.0,452.06
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,29.0,109.957,116000.0,462.23,500.0,178.0,462.23
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,22.0,109.365,88000.0,463.07,500.0,141.0,463.07
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 92000
  counters:
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_env_steps_sampled: 92000
    num_env_steps_trained: 92000
  custom_metrics: {}
  date: 2022-07-30_15-08-31
  done: false
  episode_len_mean: 453.63
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 453.63
  episode_reward_min: 141.0
  episodes_this_iter: 10
  episodes_total: 504
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.4985945522785187
          entropy_coeff: 0.0
          kl: 0.0026876942720264196
          model: {}
          policy_loss: -0.0006232843734323978
          total_loss: 3.5807061195373535
          vf_explained_var: 0.011990350671112537
          vf_loss: 3.580791711807251
        train:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,31.0,117.635,124000.0,496.8,500.0,180.0,496.8
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,31.0,117.041,124000.0,463.74,500.0,218.0,463.74
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,30.0,113.416,120000.0,457.54,500.0,178.0,457.54
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,24.0,119.52,96000.0,453.18,500.0,141.0,453.18
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00002:
  agent_timesteps_total: 124000
  counters:
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_env_steps_sampled: 124000
    num_env_steps_trained: 124000
  custom_metrics: {}
  date: 2022-07-30_15-08-37
  done: false
  episode_len_mean: 465.07
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 465.07
  episode_reward_min: 178.0
  episodes_this_iter: 8
  episodes_total: 592
  experiment_id: efef1a2bc4c54296b5efbb75abba9fd0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.662109520519152e-05
          cur_lr: 4.999999873689376e-05
          entropy: 0.40870198607444763
          entropy_coeff: 0.0
          kl: 0.0019814367406070232
          model: {}
          policy_loss: 0.0008364446694031358
          total_loss: 1.4008699655532837
          vf_explained_var: -0.14611540734767914
          vf_loss:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,32.0,121.118,128000.0,496.8,500.0,180.0,496.8
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,33.0,124.447,132000.0,472.73,500.0,218.0,472.73
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,32.0,120.925,128000.0,481.6,500.0,193.0,481.6
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,25.0,124.404,100000.0,453.16,500.0,141.0,453.16
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 132000
  counters:
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_env_steps_sampled: 132000
    num_env_steps_trained: 132000
  custom_metrics: {}
  date: 2022-07-30_15-08-42
  done: false
  episode_len_mean: 500.0
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 500.0
  episode_reward_min: 500.0
  episodes_this_iter: 8
  episodes_total: 606
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.831054760259576e-05
          cur_lr: 4.999999873689376e-05
          entropy: 0.49490001797676086
          entropy_coeff: 0.0
          kl: 0.007235722616314888
          model: {}
          policy_loss: -0.0006425747997127473
          total_loss: 2.307827949523926
          vf_explained_var: -0.1025111973285675
          vf_loss: 2.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,34.0,127.946,136000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,34.0,128.008,136000.0,473.2,500.0,218.0,473.2
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,34.0,128.354,136000.0,459.03,500.0,224.0,459.03
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,26.0,129.233,104000.0,453.42,500.0,199.0,453.42
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 140000
  counters:
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_env_steps_sampled: 140000
    num_env_steps_trained: 140000
  custom_metrics: {}
  date: 2022-07-30_15-08-49
  done: false
  episode_len_mean: 500.0
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 500.0
  episode_reward_min: 500.0
  episodes_this_iter: 8
  episodes_total: 622
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 9.15527380129788e-06
          cur_lr: 4.999999873689376e-05
          entropy: 0.47934195399284363
          entropy_coeff: 0.0
          kl: 0.009711275808513165
          model: {}
          policy_loss: 0.00017767157987691462
          total_loss: 2.3086466789245605
          vf_explained_var: -0.47225356101989746
          vf_loss: 2.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,36.0,135.027,144000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,36.0,135.558,144000.0,471.62,500.0,218.0,471.62
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,35.0,132.241,140000.0,456.27,500.0,164.0,456.27
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,28.0,138.784,112000.0,441.74,500.0,194.0,441.74
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 148000
  counters:
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_env_steps_sampled: 148000
    num_env_steps_trained: 148000
  custom_metrics: {}
  date: 2022-07-30_15-08-56
  done: false
  episode_len_mean: 500.0
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 500.0
  episode_reward_min: 500.0
  episodes_this_iter: 8
  episodes_total: 638
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.57763690064894e-06
          cur_lr: 4.999999873689376e-05
          entropy: 0.5157212615013123
          entropy_coeff: 0.0
          kl: 0.0025701404083520174
          model: {}
          policy_loss: 0.0030259571503847837
          total_loss: 2.3114945888519287
          vf_explained_var: -0.11422785371541977
          vf_loss: 2.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,38.0,142.448,152000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,38.0,143.116,152000.0,475.1,500.0,218.0,475.1
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,37.0,139.708,148000.0,451.58,500.0,154.0,451.58
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,29.0,143.606,116000.0,442.08,500.0,194.0,442.08
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 156000
  counters:
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_env_steps_sampled: 156000
    num_env_steps_trained: 156000
  custom_metrics: {}
  date: 2022-07-30_15-09-04
  done: false
  episode_len_mean: 500.0
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 500.0
  episode_reward_min: 500.0
  episodes_this_iter: 8
  episodes_total: 654
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.144409225162235e-06
          cur_lr: 4.999999873689376e-05
          entropy: 0.5100549459457397
          entropy_coeff: 0.0
          kl: 0.0053430902771651745
          model: {}
          policy_loss: 0.0006090127280913293
          total_loss: 2.30907940864563
          vf_explained_var: -0.11138387024402618
          vf_loss: 2.30

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,39.0,146.282,156000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,39.0,146.677,156000.0,488.81,500.0,286.0,488.81
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,39.0,147.307,156000.0,444.44,500.0,154.0,444.44
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,30.0,148.377,120000.0,440.77,500.0,194.0,440.77
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 124000
  counters:
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_env_steps_sampled: 124000
    num_env_steps_trained: 124000
  custom_metrics: {}
  date: 2022-07-30_15-09-10
  done: false
  episode_len_mean: 439.82
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 439.82
  episode_reward_min: 194.0
  episodes_this_iter: 8
  episodes_total: 577
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.47158995270729065
          entropy_coeff: 0.0
          kl: 0.003354985499754548
          model: {}
          policy_loss: -9.363171557197347e-05
          total_loss: 1.79887855052948
          vf_explained_var: -0.48240864276885986
          vf_loss: 1.798301100730896
        trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,42.0,157.974,168000.0,498.98,500.0,398.0,498.98
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,41.0,154.837,164000.0,494.69,500.0,286.0,494.69
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,40.0,151.397,160000.0,446.59,500.0,154.0,446.59
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,31.0,153.353,124000.0,439.82,500.0,194.0,439.82
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 128000
  counters:
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_env_steps_sampled: 128000
    num_env_steps_trained: 128000
  custom_metrics: {}
  date: 2022-07-30_15-09-16
  done: false
  episode_len_mean: 440.5
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 440.5
  episode_reward_min: 194.0
  episodes_this_iter: 9
  episodes_total: 586
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.46357032656669617
          entropy_coeff: 0.0
          kl: 0.0021909098140895367
          model: {}
          policy_loss: 0.0008951566414907575
          total_loss: 1.6747537851333618
          vf_explained_var: -0.28913336992263794
          vf_loss: 1.6734204292297363
        tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,43.0,162.431,172000.0,498.7,500.0,398.0,498.7
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,43.0,163.017,172000.0,494.69,500.0,286.0,494.69
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,42.0,159.738,168000.0,446.9,500.0,154.0,446.9
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,32.0,159.016,128000.0,440.5,500.0,194.0,440.5
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 132000
  counters:
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_env_steps_sampled: 132000
    num_env_steps_trained: 132000
  custom_metrics: {}
  date: 2022-07-30_15-09-22
  done: false
  episode_len_mean: 440.64
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 440.64
  episode_reward_min: 194.0
  episodes_this_iter: 8
  episodes_total: 594
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.4541437029838562
          entropy_coeff: 0.0
          kl: 0.002690145978704095
          model: {}
          policy_loss: -0.00011200379231013358
          total_loss: 1.754665732383728
          vf_explained_var: -0.4493090808391571
          vf_loss: 1.754239797592163
        trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,44.0,166.522,176000.0,492.95,500.0,275.0,492.95
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,44.0,167.084,176000.0,494.69,500.0,286.0,494.69
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,43.0,163.731,172000.0,447.76,500.0,154.0,447.76
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,34.0,169.731,136000.0,452.16,500.0,194.0,452.16
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 180000
  counters:
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_env_steps_sampled: 180000
    num_env_steps_trained: 180000
  custom_metrics: {}
  date: 2022-07-30_15-09-28
  done: false
  episode_len_mean: 492.95
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 492.95
  episode_reward_min: 275.0
  episodes_this_iter: 8
  episodes_total: 704
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 5.722046125811175e-07
          cur_lr: 4.999999873689376e-05
          entropy: 0.5054978728294373
          entropy_coeff: 0.0
          kl: 0.004673794377595186
          model: {}
          policy_loss: 0.0020188605412840843
          total_loss: 2.2600855827331543
          vf_explained_var: -0.13697241246700287
          vf_loss: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,46.0,174.046,184000.0,484.8,500.0,275.0,484.8
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,45.0,170.957,180000.0,491.06,500.0,286.0,491.06
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,44.0,167.451,176000.0,454.64,500.0,154.0,454.64
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,35.0,174.489,140000.0,455.82,500.0,194.0,455.82
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00002:
  agent_timesteps_total: 180000
  counters:
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_env_steps_sampled: 180000
    num_env_steps_trained: 180000
  custom_metrics: {}
  date: 2022-07-30_15-09-32
  done: false
  episode_len_mean: 466.58
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 466.58
  episode_reward_min: 154.0
  episodes_this_iter: 8
  episodes_total: 715
  experiment_id: efef1a2bc4c54296b5efbb75abba9fd0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.5762788286319847e-08
          cur_lr: 4.999999873689376e-05
          entropy: 0.35232555866241455
          entropy_coeff: 0.0
          kl: 0.0034403332974761724
          model: {}
          policy_loss: 0.001133372657932341
          total_loss: 1.2924171686172485
          vf_explained_var: -0.4705381989479065
          vf_loss: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,47.0,177.875,188000.0,484.8,500.0,275.0,484.8
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,47.0,178.23,188000.0,488.49,500.0,286.0,488.49
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,47.0,178.571,188000.0,475.21,500.0,154.0,475.21
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,36.0,179.627,144000.0,465.22,500.0,194.0,465.22
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 148000
  counters:
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_env_steps_sampled: 148000
    num_env_steps_trained: 148000
  custom_metrics: {}
  date: 2022-07-30_15-09-42
  done: false
  episode_len_mean: 468.14
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 468.14
  episode_reward_min: 194.0
  episodes_this_iter: 8
  episodes_total: 626
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.38611599802970886
          entropy_coeff: 0.0
          kl: 0.0047597214579582214
          model: {}
          policy_loss: -3.4767068427754566e-05
          total_loss: 1.7550112009048462
          vf_explained_var: -0.5216849446296692
          vf_loss: 1.754094123840332
        t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,49.0,185.708,196000.0,480.72,500.0,275.0,480.72
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,49.0,185.929,196000.0,493.8,500.0,340.0,493.8
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,48.0,182.586,192000.0,475.92,500.0,154.0,475.92
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,38.0,189.816,152000.0,470.46,500.0,194.0,470.46
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00002:
  agent_timesteps_total: 196000
  counters:
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 196000
    num_env_steps_sampled: 196000
    num_env_steps_trained: 196000
  custom_metrics: {}
  date: 2022-07-30_15-09-47
  done: false
  episode_len_mean: 482.17
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 482.17
  episode_reward_min: 209.0
  episodes_this_iter: 8
  episodes_total: 748
  experiment_id: efef1a2bc4c54296b5efbb75abba9fd0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.2351742678949904e-09
          cur_lr: 4.999999873689376e-05
          entropy: 0.34984806180000305
          entropy_coeff: 0.0
          kl: 0.0035990963224321604
          model: {}
          policy_loss: 0.001084132818505168
          total_loss: 1.9871070384979248
          vf_explained_var: -0.4778865575790405
          vf_loss: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,51.0,193.175,204000.0,467.62,500.0,275.0,467.62
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,51.0,193.575,204000.0,491.85,500.0,340.0,491.85
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,50.0,190.109,200000.0,482.17,500.0,209.0,482.17
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,39.0,194.832,156000.0,483.44,500.0,233.0,483.44
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00002:
  agent_timesteps_total: 204000
  counters:
    num_agent_steps_sampled: 204000
    num_agent_steps_trained: 204000
    num_env_steps_sampled: 204000
    num_env_steps_trained: 204000
  custom_metrics: {}
  date: 2022-07-30_15-09-54
  done: false
  episode_len_mean: 487.67
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 487.67
  episode_reward_min: 250.0
  episodes_this_iter: 8
  episodes_total: 764
  experiment_id: efef1a2bc4c54296b5efbb75abba9fd0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.1175871339474952e-09
          cur_lr: 4.999999873689376e-05
          entropy: 0.34238800406455994
          entropy_coeff: 0.0
          kl: 0.0035917269997298717
          model: {}
          policy_loss: 0.0015024642925709486
          total_loss: 1.987428903579712
          vf_explained_var: -0.48386499285697937
          vf_loss:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,52.0,197.078,208000.0,460.9,500.0,275.0,460.9
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,52.0,197.366,208000.0,491.85,500.0,340.0,491.85
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,52.0,197.272,208000.0,489.31,500.0,250.0,489.31
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,40.0,199.808,160000.0,475.78,500.0,159.0,475.78
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 212000
  counters:
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000
    num_env_steps_sampled: 212000
    num_env_steps_trained: 212000
  custom_metrics: {}
  date: 2022-07-30_15-09-59
  done: false
  episode_len_mean: 439.55
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 439.55
  episode_reward_min: 234.0
  episodes_this_iter: 13
  episodes_total: 779
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.4305115314527939e-07
          cur_lr: 4.999999873689376e-05
          entropy: 0.44987577199935913
          entropy_coeff: 0.0
          kl: 0.004981638863682747
          model: {}
          policy_loss: 0.002991950139403343
          total_loss: 3.5513837337493896
          vf_explained_var: -0.19113101065158844
          vf_loss:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,54.0,204.561,216000.0,426.82,500.0,234.0,426.82
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,54.0,204.826,216000.0,488.27,500.0,340.0,488.27
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,54.0,204.643,216000.0,493.69,500.0,345.0,493.69
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,41.0,204.929,164000.0,476.16,500.0,159.0,476.16
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 220000
  counters:
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_env_steps_sampled: 220000
    num_env_steps_trained: 220000
  custom_metrics: {}
  date: 2022-07-30_15-10-06
  done: false
  episode_len_mean: 428.3
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 428.3
  episode_reward_min: 234.0
  episodes_this_iter: 8
  episodes_total: 798
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 7.152557657263969e-08
          cur_lr: 4.999999873689376e-05
          entropy: 0.4882587492465973
          entropy_coeff: 0.0
          kl: 0.0039037009701132774
          model: {}
          policy_loss: -0.00022657058434560895
          total_loss: 1.5521928071975708
          vf_explained_var: -0.2817687392234802
          vf_loss: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,56.0,211.968,224000.0,428.95,500.0,234.0,428.95
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,56.0,212.202,224000.0,485.75,500.0,340.0,485.75
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,55.0,208.28,220000.0,493.69,500.0,345.0,493.69
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,43.0,214.523,172000.0,480.04,500.0,159.0,480.04
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00001:
  agent_timesteps_total: 228000
  counters:
    num_agent_steps_sampled: 228000
    num_agent_steps_trained: 228000
    num_env_steps_sampled: 228000
    num_env_steps_trained: 228000
  custom_metrics: {}
  date: 2022-07-30_15-10-14
  done: false
  episode_len_mean: 487.78
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 487.78
  episode_reward_min: 340.0
  episodes_this_iter: 8
  episodes_total: 802
  experiment_id: 56b0ac4ea0c44796871555a64d0eb49a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 8.731149483964806e-12
          cur_lr: 4.999999873689376e-05
          entropy: 0.3622951805591583
          entropy_coeff: 0.0
          kl: 0.004240069538354874
          model: {}
          policy_loss: -0.0013032687129452825
          total_loss: 1.7930530309677124
          vf_explained_var: -0.0021530501544475555
          vf_loss

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,57.0,215.959,228000.0,428.19,500.0,226.0,428.19
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,57.0,215.797,228000.0,487.78,500.0,340.0,487.78
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,57.0,215.733,228000.0,493.69,500.0,345.0,493.69
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,44.0,219.688,176000.0,481.66,500.0,159.0,481.66
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00001:
  agent_timesteps_total: 236000
  counters:
    num_agent_steps_sampled: 236000
    num_agent_steps_trained: 236000
    num_env_steps_sampled: 236000
    num_env_steps_trained: 236000
  custom_metrics: {}
  date: 2022-07-30_15-10-21
  done: false
  episode_len_mean: 478.06
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 478.06
  episode_reward_min: 215.0
  episodes_this_iter: 8
  episodes_total: 821
  experiment_id: 56b0ac4ea0c44796871555a64d0eb49a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.365574741982403e-12
          cur_lr: 4.999999873689376e-05
          entropy: 0.35470253229141235
          entropy_coeff: 0.0
          kl: 0.003228693036362529
          model: {}
          policy_loss: -0.0008859855006448925
          total_loss: 2.267261266708374
          vf_explained_var: -0.3385913074016571
          vf_loss: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,59.0,223.661,236000.0,424.71,500.0,226.0,424.71
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,59.0,223.232,236000.0,478.06,500.0,215.0,478.06
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,58.0,219.12,232000.0,496.63,500.0,345.0,496.63
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,45.0,224.756,180000.0,482.12,500.0,159.0,482.12
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00002:
  agent_timesteps_total: 236000
  counters:
    num_agent_steps_sampled: 236000
    num_agent_steps_trained: 236000
    num_env_steps_sampled: 236000
    num_env_steps_trained: 236000
  custom_metrics: {}
  date: 2022-07-30_15-10-24
  done: false
  episode_len_mean: 500.0
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 500.0
  episode_reward_min: 500.0
  episodes_this_iter: 8
  episodes_total: 828
  experiment_id: efef1a2bc4c54296b5efbb75abba9fd0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.365574741982403e-12
          cur_lr: 4.999999873689376e-05
          entropy: 0.34013038873672485
          entropy_coeff: 0.0
          kl: 0.003819853998720646
          model: {}
          policy_loss: 0.0013198815286159515
          total_loss: 1.9872167110443115
          vf_explained_var: -0.4782232344150543
          vf_loss: 1.9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,60.0,227.633,240000.0,429.52,500.0,226.0,429.52
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,60.0,227.196,240000.0,475.95,500.0,215.0,475.95
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,59.0,222.753,236000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,46.0,229.997,184000.0,482.0,500.0,159.0,482.0
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00001:
  agent_timesteps_total: 244000
  counters:
    num_agent_steps_sampled: 244000
    num_agent_steps_trained: 244000
    num_env_steps_sampled: 244000
    num_env_steps_trained: 244000
  custom_metrics: {}
  date: 2022-07-30_15-10-29
  done: false
  episode_len_mean: 469.92
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 469.92
  episode_reward_min: 215.0
  episodes_this_iter: 10
  episodes_total: 839
  experiment_id: 56b0ac4ea0c44796871555a64d0eb49a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.0913936854956008e-12
          cur_lr: 4.999999873689376e-05
          entropy: 0.3404547870159149
          entropy_coeff: 0.0
          kl: 0.004082272760570049
          model: {}
          policy_loss: -0.0007814341806806624
          total_loss: 1.9473047256469727
          vf_explained_var: -0.2153106927871704
          vf_loss:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,61.0,231.389,244000.0,439.18,500.0,226.0,439.18
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,61.0,231.376,244000.0,469.92,500.0,215.0,469.92
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,61.0,230.871,244000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,47.0,235.586,188000.0,482.0,500.0,159.0,482.0
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00001:
  agent_timesteps_total: 252000
  counters:
    num_agent_steps_sampled: 252000
    num_agent_steps_trained: 252000
    num_env_steps_sampled: 252000
    num_env_steps_trained: 252000
  custom_metrics: {}
  date: 2022-07-30_15-10-37
  done: false
  episode_len_mean: 469.92
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 469.92
  episode_reward_min: 215.0
  episodes_this_iter: 8
  episodes_total: 855
  experiment_id: 56b0ac4ea0c44796871555a64d0eb49a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.728484213739002e-13
          cur_lr: 4.999999873689376e-05
          entropy: 0.3517323434352875
          entropy_coeff: 0.0
          kl: 0.006291128695011139
          model: {}
          policy_loss: -0.0018924664473161101
          total_loss: 2.190648317337036
          vf_explained_var: -0.34912699460983276
          vf_loss: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,63.0,239.142,252000.0,443.52,500.0,226.0,443.52
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,63.0,239.042,252000.0,469.92,500.0,215.0,469.92
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,62.0,234.899,248000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,48.0,241.03,192000.0,480.49,500.0,159.0,480.49
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00002:
  agent_timesteps_total: 252000
  counters:
    num_agent_steps_sampled: 252000
    num_agent_steps_trained: 252000
    num_env_steps_sampled: 252000
    num_env_steps_trained: 252000
  custom_metrics: {}
  date: 2022-07-30_15-10-40
  done: false
  episode_len_mean: 500.0
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 500.0
  episode_reward_min: 500.0
  episodes_this_iter: 8
  episodes_total: 860
  experiment_id: efef1a2bc4c54296b5efbb75abba9fd0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.728484213739002e-13
          cur_lr: 4.999999873689376e-05
          entropy: 0.2902452051639557
          entropy_coeff: 0.0
          kl: 0.002780890790745616
          model: {}
          policy_loss: 0.0017548089381307364
          total_loss: 1.9876431226730347
          vf_explained_var: -0.47379007935523987
          vf_loss: 1.9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,64.0,243.175,256000.0,455.19,500.0,226.0,455.19
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,64.0,243.054,256000.0,468.2,500.0,215.0,468.2
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,64.0,242.114,256000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,49.0,246.198,196000.0,479.75,500.0,159.0,479.75
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00001:
  agent_timesteps_total: 260000
  counters:
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_env_steps_sampled: 260000
    num_env_steps_trained: 260000
  custom_metrics: {}
  date: 2022-07-30_15-10-45
  done: false
  episode_len_mean: 467.72
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 467.72
  episode_reward_min: 215.0
  episodes_this_iter: 8
  episodes_total: 871
  experiment_id: 56b0ac4ea0c44796871555a64d0eb49a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.364242106869501e-13
          cur_lr: 4.999999873689376e-05
          entropy: 0.34474343061447144
          entropy_coeff: 0.0
          kl: 0.003334126900881529
          model: {}
          policy_loss: 0.001476871082559228
          total_loss: 2.1083319187164307
          vf_explained_var: -0.26152241230010986
          vf_loss: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,66.0,250.276,264000.0,480.06,500.0,226.0,480.06
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,66.0,250.605,264000.0,470.83,500.0,215.0,470.83
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,65.0,246.152,260000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,49.0,246.198,196000.0,479.75,500.0,159.0,479.75
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 200000
  counters:
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_env_steps_sampled: 200000
    num_env_steps_trained: 200000
  custom_metrics: {}
  date: 2022-07-30_15-10-49
  done: false
  episode_len_mean: 483.76
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 483.76
  episode_reward_min: 159.0
  episodes_this_iter: 9
  episodes_total: 734
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.38782620429992676
          entropy_coeff: 0.0
          kl: 0.0025583691895008087
          model: {}
          policy_loss: -0.01694260910153389
          total_loss: 2.917051076889038
          vf_explained_var: -0.20869016647338867
          vf_loss: 2.9334821701049805
        tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,67.0,254.136,268000.0,482.08,500.0,226.0,482.08
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,67.0,254.278,268000.0,465.97,500.0,215.0,465.97
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,66.0,250.054,264000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,51.0,256.537,204000.0,483.76,500.0,159.0,483.76
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00002:
  agent_timesteps_total: 268000
  counters:
    num_agent_steps_sampled: 268000
    num_agent_steps_trained: 268000
    num_env_steps_sampled: 268000
    num_env_steps_trained: 268000
  custom_metrics: {}
  date: 2022-07-30_15-10-55
  done: false
  episode_len_mean: 500.0
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 500.0
  episode_reward_min: 500.0
  episodes_this_iter: 8
  episodes_total: 892
  experiment_id: efef1a2bc4c54296b5efbb75abba9fd0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.7053026335868762e-14
          cur_lr: 4.999999873689376e-05
          entropy: 0.28542956709861755
          entropy_coeff: 0.0
          kl: 0.0014651971869170666
          model: {}
          policy_loss: 0.0023097284138202667
          total_loss: 1.988202452659607
          vf_explained_var: -0.15397752821445465
          vf_loss: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,68.0,257.93,272000.0,483.38,500.0,226.0,483.38
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,68.0,258.153,272000.0,463.21,500.0,215.0,463.21
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,68.0,257.489,272000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,52.0,261.727,208000.0,488.67,500.0,225.0,488.67
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 276000
  counters:
    num_agent_steps_sampled: 276000
    num_agent_steps_trained: 276000
    num_env_steps_sampled: 276000
    num_env_steps_trained: 276000
  custom_metrics: {}
  date: 2022-07-30_15-11-00
  done: false
  episode_len_mean: 487.74
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 487.74
  episode_reward_min: 233.0
  episodes_this_iter: 8
  episodes_total: 914
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.470348535789981e-09
          cur_lr: 4.999999873689376e-05
          entropy: 0.418632447719574
          entropy_coeff: 0.0
          kl: 0.005761595908552408
          model: {}
          policy_loss: -6.372505595209077e-05
          total_loss: 2.6410655975341797
          vf_explained_var: -0.08555133640766144
          vf_loss: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,70.0,265.711,280000.0,492.81,500.0,233.0,492.81
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,70.0,265.997,280000.0,475.14,500.0,219.0,475.14
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,69.0,261.669,276000.0,499.77,500.0,477.0,499.77
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,52.0,261.727,208000.0,488.67,500.0,225.0,488.67
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 212000
  counters:
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000
    num_env_steps_sampled: 212000
    num_env_steps_trained: 212000
  custom_metrics: {}
  date: 2022-07-30_15-11-05
  done: false
  episode_len_mean: 493.35
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 493.35
  episode_reward_min: 390.0
  episodes_this_iter: 8
  episodes_total: 758
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.3747876286506653
          entropy_coeff: 0.0
          kl: 0.0009082785109058022
          model: {}
          policy_loss: -0.014663881622254848
          total_loss: 2.4124605655670166
          vf_explained_var: -0.4260338842868805
          vf_loss: 2.426942825317383
        trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,71.0,269.65,284000.0,488.04,500.0,235.0,488.04
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,71.0,269.738,284000.0,468.82,500.0,219.0,468.82
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,70.0,265.797,280000.0,499.77,500.0,477.0,499.77
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,54.0,272.289,216000.0,493.18,500.0,390.0,493.18
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00002:
  agent_timesteps_total: 284000
  counters:
    num_agent_steps_sampled: 284000
    num_agent_steps_trained: 284000
    num_env_steps_sampled: 284000
    num_env_steps_trained: 284000
  custom_metrics: {}
  date: 2022-07-30_15-11-11
  done: false
  episode_len_mean: 496.8
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 496.8
  episode_reward_min: 392.0
  episodes_this_iter: 9
  episodes_total: 925
  experiment_id: efef1a2bc4c54296b5efbb75abba9fd0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.0658141459917976e-15
          cur_lr: 4.999999873689376e-05
          entropy: 0.33927756547927856
          entropy_coeff: 0.0
          kl: 0.006686536129564047
          model: {}
          policy_loss: -0.0017045161221176386
          total_loss: 1.4972409009933472
          vf_explained_var: 0.2408037781715393
          vf_loss: 1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,72.0,273.878,288000.0,488.04,500.0,235.0,488.04
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,72.0,274.139,288000.0,473.65,500.0,238.0,473.65
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,71.0,269.918,284000.0,496.8,500.0,392.0,496.8
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,55.0,277.461,220000.0,492.28,500.0,349.0,492.28
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 292000
  counters:
    num_agent_steps_sampled: 292000
    num_agent_steps_trained: 292000
    num_env_steps_sampled: 292000
    num_env_steps_trained: 292000
  custom_metrics: {}
  date: 2022-07-30_15-11-17
  done: false
  episode_len_mean: 462.33
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 462.33
  episode_reward_min: 168.0
  episodes_this_iter: 13
  episodes_total: 953
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.2351742678949904e-09
          cur_lr: 4.999999873689376e-05
          entropy: 0.413785845041275
          entropy_coeff: 0.0
          kl: 0.005547886714339256
          model: {}
          policy_loss: 0.002054541604593396
          total_loss: 4.059516429901123
          vf_explained_var: 0.0007586145657114685
          vf_loss: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,73.0,278.249,292000.0,462.33,500.0,168.0,462.33
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,73.0,278.412,292000.0,471.2,500.0,238.0,471.2
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,73.0,277.849,292000.0,496.17,500.0,392.0,496.17
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,56.0,282.882,224000.0,492.29,500.0,342.0,492.29
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00001:
  agent_timesteps_total: 300000
  counters:
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_env_steps_sampled: 300000
    num_env_steps_trained: 300000
  custom_metrics: {}
  date: 2022-07-30_15-11-24
  done: false
  episode_len_mean: 473.15
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 473.15
  episode_reward_min: 238.0
  episodes_this_iter: 8
  episodes_total: 957
  experiment_id: 56b0ac4ea0c44796871555a64d0eb49a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.0658141459917976e-15
          cur_lr: 4.999999873689376e-05
          entropy: 0.35933929681777954
          entropy_coeff: 0.0
          kl: 0.003290757769718766
          model: {}
          policy_loss: 0.002420681994408369
          total_loss: 1.968146562576294
          vf_explained_var: 0.09920774400234222
          vf_loss: 1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,75.0,286.207,300000.0,447.86,500.0,168.0,447.86
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,75.0,285.615,300000.0,473.15,500.0,238.0,473.15
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,74.0,281.467,296000.0,496.17,500.0,392.0,496.17
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,57.0,287.988,228000.0,492.29,500.0,342.0,492.29
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00002:
  agent_timesteps_total: 300000
  counters:
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_env_steps_sampled: 300000
    num_env_steps_trained: 300000
  custom_metrics: {}
  date: 2022-07-30_15-11-27
  done: false
  episode_len_mean: 496.17
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 496.17
  episode_reward_min: 392.0
  episodes_this_iter: 8
  episodes_total: 957
  experiment_id: efef1a2bc4c54296b5efbb75abba9fd0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 5.329070729958988e-16
          cur_lr: 4.999999873689376e-05
          entropy: 0.2719748616218567
          entropy_coeff: 0.0
          kl: 0.001613154774531722
          model: {}
          policy_loss: 0.00174320035148412
          total_loss: 1.1514896154403687
          vf_explained_var: 0.23053470253944397
          vf_loss: 1.14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,76.0,290.311,304000.0,435.06,500.0,168.0,435.06
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,76.0,289.721,304000.0,476.86,500.0,238.0,476.86
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,75.0,285.534,300000.0,496.17,500.0,392.0,496.17
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,58.0,293.207,232000.0,493.06,500.0,342.0,493.06
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00001:
  agent_timesteps_total: 308000
  counters:
    num_agent_steps_sampled: 308000
    num_agent_steps_trained: 308000
    num_env_steps_sampled: 308000
    num_env_steps_trained: 308000
  custom_metrics: {}
  date: 2022-07-30_15-11-32
  done: false
  episode_len_mean: 476.86
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 476.86
  episode_reward_min: 238.0
  episodes_this_iter: 8
  episodes_total: 973
  experiment_id: 56b0ac4ea0c44796871555a64d0eb49a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.664535364979494e-16
          cur_lr: 4.999999873689376e-05
          entropy: 0.352756142616272
          entropy_coeff: 0.0
          kl: 0.005146995652467012
          model: {}
          policy_loss: -0.0013983993558213115
          total_loss: 1.964328408241272
          vf_explained_var: -0.45161768794059753
          vf_loss: 1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,77.0,294.411,308000.0,436.15,500.0,168.0,436.15
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,78.0,297.476,312000.0,477.57,500.0,238.0,477.57
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,77.0,293.688,308000.0,494.84,500.0,392.0,494.84
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,58.0,293.207,232000.0,493.06,500.0,342.0,493.06
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 236000
  counters:
    num_agent_steps_sampled: 236000
    num_agent_steps_trained: 236000
    num_env_steps_sampled: 236000
    num_env_steps_trained: 236000
  custom_metrics: {}
  date: 2022-07-30_15-11-36
  done: false
  episode_len_mean: 493.01
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 493.01
  episode_reward_min: 342.0
  episodes_this_iter: 8
  episodes_total: 806
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.41344189643859863
          entropy_coeff: 0.0
          kl: 0.004319516010582447
          model: {}
          policy_loss: -8.309810073114932e-05
          total_loss: 1.777642011642456
          vf_explained_var: 0.1178378313779831
          vf_loss: 1.7768611907958984
        trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,79.0,302.709,316000.0,424.61,500.0,168.0,424.61
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,79.0,301.735,316000.0,482.67,500.0,238.0,482.67
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,78.0,297.584,312000.0,494.84,500.0,392.0,494.84
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,59.0,298.735,236000.0,493.01,500.0,342.0,493.01
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 240000
  counters:
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_env_steps_sampled: 240000
    num_env_steps_trained: 240000
  custom_metrics: {}
  date: 2022-07-30_15-11-42
  done: false
  episode_len_mean: 493.42
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 493.42
  episode_reward_min: 342.0
  episodes_this_iter: 8
  episodes_total: 814
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.41264215111732483
          entropy_coeff: 0.0
          kl: 0.000980109442025423
          model: {}
          policy_loss: 0.0020575751550495625
          total_loss: 1.7664729356765747
          vf_explained_var: 0.1633954793214798
          vf_loss: 1.7642194032669067
        trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,80.0,306.834,320000.0,424.61,500.0,168.0,424.61
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,80.0,305.461,320000.0,485.43,500.0,238.0,485.43
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,80.0,305.499,320000.0,494.84,500.0,392.0,494.84
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,60.0,304.042,240000.0,493.42,500.0,342.0,493.42
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 244000
  counters:
    num_agent_steps_sampled: 244000
    num_agent_steps_trained: 244000
    num_env_steps_sampled: 244000
    num_env_steps_trained: 244000
  custom_metrics: {}
  date: 2022-07-30_15-11-48
  done: false
  episode_len_mean: 494.96
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 494.96
  episode_reward_min: 342.0
  episodes_this_iter: 8
  episodes_total: 822
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.4520857632160187
          entropy_coeff: 0.0
          kl: 0.0037104319781064987
          model: {}
          policy_loss: 0.00024255886091850698
          total_loss: 1.462740182876587
          vf_explained_var: 0.33354249596595764
          vf_loss: 1.4617555141448975
        tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,81.0,311.258,324000.0,419.29,500.0,168.0,419.29
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,82.0,313.04,328000.0,485.43,500.0,238.0,485.43
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,81.0,309.521,324000.0,495.07,500.0,392.0,495.07
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,62.0,315.134,248000.0,494.95,500.0,342.0,494.95
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00001:
  agent_timesteps_total: 332000
  counters:
    num_agent_steps_sampled: 332000
    num_agent_steps_trained: 332000
    num_env_steps_sampled: 332000
    num_env_steps_trained: 332000
  custom_metrics: {}
  date: 2022-07-30_15-11-56
  done: false
  episode_len_mean: 492.33
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 492.33
  episode_reward_min: 295.0
  episodes_this_iter: 8
  episodes_total: 1021
  experiment_id: 56b0ac4ea0c44796871555a64d0eb49a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.6653346031121838e-17
          cur_lr: 4.999999873689376e-05
          entropy: 0.28174543380737305
          entropy_coeff: 0.0
          kl: 0.0039243195205926895
          model: {}
          policy_loss: 0.001406316296197474
          total_loss: 1.432857871055603
          vf_explained_var: -0.5234137177467346
          vf_loss: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,83.0,319.152,332000.0,416.81,500.0,168.0,416.81
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,83.0,317.068,332000.0,492.33,500.0,295.0,492.33
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,82.0,313.448,328000.0,491.55,500.0,292.0,491.55
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,63.0,320.289,252000.0,495.98,500.0,342.0,495.98
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00002:
  agent_timesteps_total: 332000
  counters:
    num_agent_steps_sampled: 332000
    num_agent_steps_trained: 332000
    num_env_steps_sampled: 332000
    num_env_steps_trained: 332000
  custom_metrics: {}
  date: 2022-07-30_15-11-59
  done: false
  episode_len_mean: 493.27
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 493.27
  episode_reward_min: 292.0
  episodes_this_iter: 8
  episodes_total: 1022
  experiment_id: efef1a2bc4c54296b5efbb75abba9fd0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.0816682538902298e-18
          cur_lr: 4.999999873689376e-05
          entropy: 0.3502531945705414
          entropy_coeff: 0.0
          kl: 0.012336790561676025
          model: {}
          policy_loss: -0.0046638790518045425
          total_loss: 2.17082142829895
          vf_explained_var: 0.4116925001144409
          vf_loss: 2.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,84.0,323.096,336000.0,434.78,500.0,201.0,434.78
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,85.0,324.328,340000.0,497.62,500.0,383.0,497.62
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,84.0,320.695,336000.0,485.65,500.0,292.0,485.65
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,63.0,320.289,252000.0,495.98,500.0,342.0,495.98
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 256000
  counters:
    num_agent_steps_sampled: 256000
    num_agent_steps_trained: 256000
    num_env_steps_sampled: 256000
    num_env_steps_trained: 256000
  custom_metrics: {}
  date: 2022-07-30_15-12-03
  done: false
  episode_len_mean: 493.02
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 493.02
  episode_reward_min: 342.0
  episodes_this_iter: 10
  episodes_total: 848
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.45252475142478943
          entropy_coeff: 0.0
          kl: 0.003788615111261606
          model: {}
          policy_loss: -0.00016472814604640007
          total_loss: 2.4251468181610107
          vf_explained_var: 0.3734685480594635
          vf_loss: 2.424553632736206
        tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,85.0,326.774,340000.0,435.82,500.0,201.0,435.82
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,86.0,328.113,344000.0,499.47,500.0,447.0,499.47
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,85.0,324.536,340000.0,483.51,500.0,292.0,483.51
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,65.0,330.544,260000.0,488.85,500.0,342.0,488.85
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00001:
  agent_timesteps_total: 348000
  counters:
    num_agent_steps_sampled: 348000
    num_agent_steps_trained: 348000
    num_env_steps_sampled: 348000
    num_env_steps_trained: 348000
  custom_metrics: {}
  date: 2022-07-30_15-12-11
  done: false
  episode_len_mean: 499.47
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 499.47
  episode_reward_min: 447.0
  episodes_this_iter: 8
  episodes_total: 1053
  experiment_id: 56b0ac4ea0c44796871555a64d0eb49a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.0408341269451149e-18
          cur_lr: 4.999999873689376e-05
          entropy: 0.24006322026252747
          entropy_coeff: 0.0
          kl: 0.0037822083104401827
          model: {}
          policy_loss: 0.000694339512847364
          total_loss: 1.4321460723876953
          vf_explained_var: -0.5962539315223694
          vf_loss:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,87.0,334.291,348000.0,456.19,500.0,201.0,456.19
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,87.0,331.752,348000.0,499.47,500.0,447.0,499.47
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,87.0,332.242,348000.0,483.11,500.0,292.0,483.11
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,66.0,335.476,264000.0,482.15,500.0,342.0,482.15
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 268000
  counters:
    num_agent_steps_sampled: 268000
    num_agent_steps_trained: 268000
    num_env_steps_sampled: 268000
    num_env_steps_trained: 268000
  custom_metrics: {}
  date: 2022-07-30_15-12-18
  done: false
  episode_len_mean: 477.13
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 477.13
  episode_reward_min: 342.0
  episodes_this_iter: 9
  episodes_total: 875
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.4598101079463959
          entropy_coeff: 0.0
          kl: 0.004130559507757425
          model: {}
          policy_loss: -0.0006214503082446754
          total_loss: 2.1046993732452393
          vf_explained_var: 0.4918709099292755
          vf_loss: 2.104494571685791
        train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,89.0,342.563,356000.0,460.04,500.0,234.0,460.04
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,89.0,339.854,356000.0,499.47,500.0,447.0,499.47
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,88.0,336.424,352000.0,483.11,500.0,292.0,483.11
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,67.0,340.257,268000.0,477.13,500.0,342.0,477.13
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00002:
  agent_timesteps_total: 356000
  counters:
    num_agent_steps_sampled: 356000
    num_agent_steps_trained: 356000
    num_env_steps_sampled: 356000
    num_env_steps_trained: 356000
  custom_metrics: {}
  date: 2022-07-30_15-12-22
  done: false
  episode_len_mean: 484.44
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 484.44
  episode_reward_min: 292.0
  episodes_this_iter: 8
  episodes_total: 1073
  experiment_id: efef1a2bc4c54296b5efbb75abba9fd0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.602085317362787e-19
          cur_lr: 4.999999873689376e-05
          entropy: 0.31717628240585327
          entropy_coeff: 0.0
          kl: 0.004287182353436947
          model: {}
          policy_loss: 0.0008970500202849507
          total_loss: 2.2690622806549072
          vf_explained_var: 0.05126527324318886
          vf_loss: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,90.0,346.495,360000.0,465.79,500.0,241.0,465.79
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,91.0,348.496,364000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,90.0,344.443,360000.0,484.44,500.0,292.0,484.44
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,68.0,345.986,272000.0,475.52,500.0,342.0,475.52
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 364000
  counters:
    num_agent_steps_sampled: 364000
    num_agent_steps_trained: 364000
    num_env_steps_sampled: 364000
    num_env_steps_trained: 364000
  custom_metrics: {}
  date: 2022-07-30_15-12-29
  done: false
  episode_len_mean: 471.58
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 471.58
  episode_reward_min: 249.0
  episodes_this_iter: 8
  episodes_total: 1111
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.0913936854956008e-12
          cur_lr: 4.999999873689376e-05
          entropy: 0.282288134098053
          entropy_coeff: 0.0
          kl: 0.003144259564578533
          model: {}
          policy_loss: 0.0016214852221310139
          total_loss: 1.8262183666229248
          vf_explained_var: -0.23404261469841003
          vf_loss: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,92.0,354.535,368000.0,474.69,500.0,249.0,474.69
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,92.0,352.711,368000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,91.0,348.445,364000.0,484.44,500.0,292.0,484.44
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,69.0,351.406,276000.0,469.8,500.0,342.0,469.8
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 280000
  counters:
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_env_steps_sampled: 280000
    num_env_steps_trained: 280000
  custom_metrics: {}
  date: 2022-07-30_15-12-35
  done: false
  episode_len_mean: 465.39
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 465.39
  episode_reward_min: 342.0
  episodes_this_iter: 8
  episodes_total: 901
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.4861947000026703
          entropy_coeff: 0.0
          kl: 0.0017223571194335818
          model: {}
          policy_loss: 0.0011419968213886023
          total_loss: 2.2597920894622803
          vf_explained_var: 0.4734499454498291
          vf_loss: 2.258305788040161
        train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,93.0,358.546,372000.0,481.19,500.0,249.0,481.19
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,93.0,356.397,372000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,93.0,356.741,372000.0,483.18,500.0,292.0,483.18
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,70.0,356.909,280000.0,465.39,500.0,342.0,465.39
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 284000
  counters:
    num_agent_steps_sampled: 284000
    num_agent_steps_trained: 284000
    num_env_steps_sampled: 284000
    num_env_steps_trained: 284000
  custom_metrics: {}
  date: 2022-07-30_15-12-41
  done: false
  episode_len_mean: 462.95
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 462.95
  episode_reward_min: 342.0
  episodes_this_iter: 9
  episodes_total: 910
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.4552249312400818
          entropy_coeff: 0.0
          kl: 0.002274655271321535
          model: {}
          policy_loss: 0.0015432818327099085
          total_loss: 2.5726399421691895
          vf_explained_var: 0.3950121998786926
          vf_loss: 2.5706417560577393
        train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,94.0,362.321,376000.0,487.01,500.0,296.0,487.01
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,95.0,364.655,380000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,94.0,360.786,376000.0,479.27,500.0,296.0,479.27
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,71.0,362.674,284000.0,462.95,500.0,342.0,462.95
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 380000
  counters:
    num_agent_steps_sampled: 380000
    num_agent_steps_trained: 380000
    num_env_steps_sampled: 380000
    num_env_steps_trained: 380000
  custom_metrics: {}
  date: 2022-07-30_15-12-45
  done: false
  episode_len_mean: 491.59
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 491.59
  episode_reward_min: 301.0
  episodes_this_iter: 8
  episodes_total: 1143
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 6.821210534347505e-14
          cur_lr: 4.999999873689376e-05
          entropy: 0.2972845733165741
          entropy_coeff: 0.0
          kl: 0.0035693144891411066
          model: {}
          policy_loss: 0.00017477505025453866
          total_loss: 1.6735647916793823
          vf_explained_var: -0.027975497767329216
          vf_los

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,96.0,369.986,384000.0,493.96,500.0,309.0,493.96
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,96.0,368.759,384000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,95.0,365.02,380000.0,476.26,500.0,296.0,476.26
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,72.0,368.347,288000.0,462.84,500.0,342.0,462.84
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00001:
  agent_timesteps_total: 388000
  counters:
    num_agent_steps_sampled: 388000
    num_agent_steps_trained: 388000
    num_env_steps_sampled: 388000
    num_env_steps_trained: 388000
  custom_metrics: {}
  date: 2022-07-30_15-12-52
  done: false
  episode_len_mean: 500.0
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 500.0
  episode_reward_min: 500.0
  episodes_this_iter: 8
  episodes_total: 1133
  experiment_id: 56b0ac4ea0c44796871555a64d0eb49a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.0164395770948388e-21
          cur_lr: 4.999999873689376e-05
          entropy: 0.19474941492080688
          entropy_coeff: 0.0
          kl: 0.0018443544395267963
          model: {}
          policy_loss: 0.0019553895108401775
          total_loss: 1.4334083795547485
          vf_explained_var: -0.06451711058616638
          vf_loss:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,97.0,373.987,388000.0,493.96,500.0,309.0,493.96
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,97.0,373.04,388000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,97.0,372.916,388000.0,452.86,500.0,251.0,452.86
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,73.0,374.036,292000.0,462.46,500.0,342.0,462.46
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 296000
  counters:
    num_agent_steps_sampled: 296000
    num_agent_steps_trained: 296000
    num_env_steps_sampled: 296000
    num_env_steps_trained: 296000
  custom_metrics: {}
  date: 2022-07-30_15-12-58
  done: false
  episode_len_mean: 462.46
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 462.46
  episode_reward_min: 342.0
  episodes_this_iter: 8
  episodes_total: 934
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.40316012501716614
          entropy_coeff: 0.0
          kl: 0.0022482662461698055
          model: {}
          policy_loss: 0.001699799089692533
          total_loss: 1.9174965620040894
          vf_explained_var: 0.5016850233078003
          vf_loss: 1.9153472185134888
        trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,98.0,377.918,392000.0,494.79,500.0,309.0,494.79
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,99.0,381.246,396000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,98.0,377.112,392000.0,447.83,500.0,251.0,447.83
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,74.0,379.221,296000.0,462.46,500.0,342.0,462.46
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 396000
  counters:
    num_agent_steps_sampled: 396000
    num_agent_steps_trained: 396000
    num_env_steps_sampled: 396000
    num_env_steps_trained: 396000
  custom_metrics: {}
  date: 2022-07-30_15-13-01
  done: false
  episode_len_mean: 496.7
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 496.7
  episode_reward_min: 309.0
  episodes_this_iter: 8
  episodes_total: 1175
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 8.526513167934381e-15
          cur_lr: 4.999999873689376e-05
          entropy: 0.33888936042785645
          entropy_coeff: 0.0
          kl: 0.005578006152063608
          model: {}
          policy_loss: 2.433638474030886e-05
          total_loss: 2.2278523445129395
          vf_explained_var: -0.1314031481742859
          vf_loss: 2.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,100.0,385.755,400000.0,497.6,500.0,397.0,497.6
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,100.0,385.144,400000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,100.0,385.351,400000.0,441.03,500.0,251.0,441.03
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,75.0,384.829,300000.0,462.95,500.0,342.0,462.95
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 304000
  counters:
    num_agent_steps_sampled: 304000
    num_agent_steps_trained: 304000
    num_env_steps_sampled: 304000
    num_env_steps_trained: 304000
  custom_metrics: {}
  date: 2022-07-30_15-13-08
  done: false
  episode_len_mean: 465.72
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 465.72
  episode_reward_min: 342.0
  episodes_this_iter: 8
  episodes_total: 950
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.3892086446285248
          entropy_coeff: 0.0
          kl: 0.0021445711608976126
          model: {}
          policy_loss: 0.0018102193716913462
          total_loss: 1.9175848960876465
          vf_explained_var: 0.29742875695228577
          vf_loss: 1.9153456687927246
        tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,101.0,389.901,404000.0,496.59,500.0,397.0,496.59
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,102.0,393.547,408000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,101.0,389.709,404000.0,438.27,500.0,251.0,438.27
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,76.0,390.108,304000.0,465.72,500.0,342.0,465.72
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 308000
  counters:
    num_agent_steps_sampled: 308000
    num_agent_steps_trained: 308000
    num_env_steps_sampled: 308000
    num_env_steps_trained: 308000
  custom_metrics: {}
  date: 2022-07-30_15-13-14
  done: false
  episode_len_mean: 470.85
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 470.85
  episode_reward_min: 342.0
  episodes_this_iter: 8
  episodes_total: 958
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.3738427460193634
          entropy_coeff: 0.0
          kl: 0.0018484166357666254
          model: {}
          policy_loss: 0.0017023268155753613
          total_loss: 1.9174076318740845
          vf_explained_var: 0.22071614861488342
          vf_loss: 1.9153355360031128
        tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,103.0,398.023,412000.0,484.29,500.0,277.0,484.29
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,103.0,397.677,412000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,102.0,393.644,408000.0,438.27,500.0,251.0,438.27
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,78.0,400.356,312000.0,476.46,500.0,345.0,476.46
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00002:
  agent_timesteps_total: 412000
  counters:
    num_agent_steps_sampled: 412000
    num_agent_steps_trained: 412000
    num_env_steps_sampled: 412000
    num_env_steps_trained: 412000
  custom_metrics: {}
  date: 2022-07-30_15-13-20
  done: false
  episode_len_mean: 438.11
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 438.11
  episode_reward_min: 251.0
  episodes_this_iter: 8
  episodes_total: 1197
  experiment_id: efef1a2bc4c54296b5efbb75abba9fd0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 5.082197885474194e-22
          cur_lr: 4.999999873689376e-05
          entropy: 0.3250499665737152
          entropy_coeff: 0.0
          kl: 0.003665488213300705
          model: {}
          policy_loss: -0.01106623187661171
          total_loss: 2.410830020904541
          vf_explained_var: -0.20014381408691406
          vf_loss: 2.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,104.0,401.888,416000.0,480.12,500.0,277.0,480.12
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,105.0,405.821,420000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,104.0,401.325,416000.0,437.82,500.0,251.0,437.82
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,79.0,405.361,316000.0,481.44,500.0,345.0,481.44
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 420000
  counters:
    num_agent_steps_sampled: 420000
    num_agent_steps_trained: 420000
    num_env_steps_sampled: 420000
    num_env_steps_trained: 420000
  custom_metrics: {}
  date: 2022-07-30_15-13-25
  done: false
  episode_len_mean: 480.12
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 480.12
  episode_reward_min: 277.0
  episodes_this_iter: 8
  episodes_total: 1227
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.1316282919835953e-15
          cur_lr: 4.999999873689376e-05
          entropy: 0.43371903896331787
          entropy_coeff: 0.0
          kl: 0.00270905252546072
          model: {}
          policy_loss: 0.0014970661140978336
          total_loss: 1.9470617771148682
          vf_explained_var: -0.22772277891635895
          vf_loss:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,106.0,410.003,424000.0,479.97,500.0,277.0,479.97
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,106.0,409.915,424000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,106.0,409.046,424000.0,439.02,500.0,251.0,439.02
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,80.0,410.744,320000.0,485.74,500.0,373.0,485.74
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00001:
  agent_timesteps_total: 428000
  counters:
    num_agent_steps_sampled: 428000
    num_agent_steps_trained: 428000
    num_env_steps_sampled: 428000
    num_env_steps_trained: 428000
  custom_metrics: {}
  date: 2022-07-30_15-13-33
  done: false
  episode_len_mean: 500.0
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 500.0
  episode_reward_min: 500.0
  episodes_this_iter: 8
  episodes_total: 1213
  experiment_id: 56b0ac4ea0c44796871555a64d0eb49a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 9.926167745066785e-25
          cur_lr: 4.999999873689376e-05
          entropy: 0.1956542730331421
          entropy_coeff: 0.0
          kl: 0.0030686885584145784
          model: {}
          policy_loss: 0.0018784734420478344
          total_loss: 1.4333306550979614
          vf_explained_var: -0.37367019057273865
          vf_loss: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,107.0,413.776,428000.0,480.57,500.0,277.0,480.57
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,108.0,417.34,432000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,107.0,412.578,428000.0,444.18,500.0,251.0,444.18
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,81.0,415.686,324000.0,490.71,500.0,373.0,490.71
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 328000
  counters:
    num_agent_steps_sampled: 328000
    num_agent_steps_trained: 328000
    num_env_steps_sampled: 328000
    num_env_steps_trained: 328000
  custom_metrics: {}
  date: 2022-07-30_15-13-39
  done: false
  episode_len_mean: 494.68
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 494.68
  episode_reward_min: 373.0
  episodes_this_iter: 8
  episodes_total: 998
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.38042891025543213
          entropy_coeff: 0.0
          kl: 0.0033241338096559048
          model: {}
          policy_loss: 0.00037503166822716594
          total_loss: 1.9163683652877808
          vf_explained_var: 0.13230553269386292
          vf_loss: 1.9153283834457397
        t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,109.0,421.338,436000.0,480.57,500.0,277.0,480.57
PPOTrainer_CartPole-v1_c1a07_00001,RUNNING,192.168.1.85:44450,109.0,420.986,436000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,109.0,420.147,436000.0,462.84,500.0,295.0,462.84
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,82.0,420.716,328000.0,494.68,500.0,373.0,494.68
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 332000
  counters:
    num_agent_steps_sampled: 332000
    num_agent_steps_trained: 332000
    num_env_steps_sampled: 332000
    num_env_steps_trained: 332000
  custom_metrics: {}
  date: 2022-07-30_15-13-44
  done: false
  episode_len_mean: 497.55
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 497.55
  episode_reward_min: 377.0
  episodes_this_iter: 8
  episodes_total: 1006
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.3924269378185272
          entropy_coeff: 0.0
          kl: 0.0033260940108448267
          model: {}
          policy_loss: -0.00020911604224238545
          total_loss: 1.9157824516296387
          vf_explained_var: 0.10181746631860733
          vf_loss: 1.9153263568878174
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,110.0,425.451,440000.0,480.57,500.0,277.0,480.57
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,110.0,424.3,440000.0,469.39,500.0,295.0,469.39
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,83.0,425.869,332000.0,497.55,500.0,377.0,497.55
PPOTrainer_CartPole-v1_c1a07_00003,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 444000
  counters:
    num_agent_steps_sampled: 444000
    num_agent_steps_trained: 444000
    num_env_steps_sampled: 444000
    num_env_steps_trained: 444000
  custom_metrics: {}
  date: 2022-07-30_15-13-49
  done: false
  episode_len_mean: 476.52
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 476.52
  episode_reward_min: 277.0
  episodes_this_iter: 8
  episodes_total: 1275
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 5.329070729958988e-16
          cur_lr: 4.999999873689376e-05
          entropy: 0.43643373250961304
          entropy_coeff: 0.0
          kl: 0.005844387225806713
          model: {}
          policy_loss: -0.0007259220001287758
          total_loss: 2.3581466674804688
          vf_explained_var: -0.3385825753211975
          vf_loss:

[2m[33m(raylet)[0m bash: /home/ibraheem/miniconda3/envs/tf/lib/libtinfo.so.6: no version information available (required by bash)
[2m[36m(PPOTrainer pid=46041)[0m 2022-07-30 15:13:55,417	INFO trainer.py:2332 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPOTrainer pid=46041)[0m 2022-07-30 15:13:55,418	INFO ppo.py:414 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=46041)[0m 2022-07-30 15:13:55,419	INFO trainer.py:903 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[33m(raylet)[0m bash: /home/ibraheem/miniconda3/envs/tf/lib/libtinfo.so.6: no version informatio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,112.0,433.45,448000.0,477.27,500.0,277.0,477.27
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,112.0,431.512,448000.0,470.83,500.0,295.0,470.83
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,84.0,430.949,336000.0,499.2,500.0,431.0,499.2
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0


Result for PPOTrainer_CartPole-v1_c1a07_00002:
  agent_timesteps_total: 452000
  counters:
    num_agent_steps_sampled: 452000
    num_agent_steps_trained: 452000
    num_env_steps_sampled: 452000
    num_env_steps_trained: 452000
  custom_metrics: {}
  date: 2022-07-30_15-13-58
  done: false
  episode_len_mean: 471.57
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 471.57
  episode_reward_min: 310.0
  episodes_this_iter: 8
  episodes_total: 1282
  experiment_id: efef1a2bc4c54296b5efbb75abba9fd0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.176373678421371e-23
          cur_lr: 4.999999873689376e-05
          entropy: 0.3444056808948517
          entropy_coeff: 0.0
          kl: 0.0029593282379209995
          model: {}
          policy_loss: -0.0009761955006979406
          total_loss: 2.1310980319976807
          vf_explained_var: 0.04992324486374855
          vf_loss:



Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,112.0,433.45,448000.0,477.27,500.0,277.0,477.27
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,113.0,434.958,452000.0,471.57,500.0,310.0,471.57
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,84.0,430.949,336000.0,499.2,500.0,431.0,499.2
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 452000
  counters:
    num_agent_steps_sampled: 452000
    num_agent_steps_trained: 452000
    num_env_steps_sampled: 452000
    num_env_steps_trained: 452000
  custom_metrics: {}
  date: 2022-07-30_15-13-57
  done: false
  episode_len_mean: 478.77
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 478.77
  episode_reward_min: 277.0
  episodes_this_iter: 8
  episodes_total: 1292
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.664535364979494e-16
          cur_lr: 4.999999873689376e-05
          entropy: 0.42568767070770264
          entropy_coeff: 0.0
          kl: 0.010030900128185749
          model: {}
          policy_loss: -0.0018703796667978168
          total_loss: 2.1957194805145264
          vf_explained_var: 1.426409653504379e-06
          vf_los



Result for PPOTrainer_CartPole-v1_c1a07_00003:
  agent_timesteps_total: 4000
  counters:
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_env_steps_sampled: 4000
    num_env_steps_trained: 4000
  custom_metrics: {}
  date: 2022-07-30_15-14-04
  done: false
  episode_len_mean: 23.03488372093023
  episode_media: {}
  episode_reward_max: 108.0
  episode_reward_mean: 23.03488372093023
  episode_reward_min: 8.0
  episodes_this_iter: 172
  episodes_total: 172
  experiment_id: ad66a076bc0a453983e0662b54f88cdb
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.6673904061317444
          entropy_coeff: 0.0
          kl: 0.026295851916074753
          model: {}
          policy_loss: -0.029459834098815918
          total_loss: 8.895322799682617
          vf_explained_var: 0.004959145560860634
          v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,114.0,440.991,456000.0,482.13,500.0,335.0,482.13
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,114.0,439.141,456000.0,471.36,500.0,310.0,471.36
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,1.0,4.40912,4000.0,23.0349,108.0,8.0,23.0349
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,86.0,441.197,344000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 460000
  counters:
    num_agent_steps_sampled: 460000
    num_agent_steps_trained: 460000
    num_env_steps_sampled: 460000
    num_env_steps_trained: 460000
  custom_metrics: {}
  date: 2022-07-30_15-14-08
  done: false
  episode_len_mean: 483.95
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 483.95
  episode_reward_min: 335.0
  episodes_this_iter: 8
  episodes_total: 1308
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.664535364979494e-16
          cur_lr: 4.999999873689376e-05
          entropy: 0.4240241050720215
          entropy_coeff: 0.0
          kl: 0.0033944265451282263
          model: {}
          policy_loss: -0.0005409028963185847
          total_loss: 1.6526857614517212
          vf_explained_var: 0.0001967236603377387
          vf_los

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,115.0,444.94,460000.0,483.95,500.0,335.0,483.95
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,115.0,443.201,460000.0,468.72,500.0,310.0,468.72
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,2.0,8.54885,8000.0,42.5,129.0,8.0,42.5
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,87.0,446.745,348000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0


Result for PPOTrainer_CartPole-v1_c1a07_00003:
  agent_timesteps_total: 12000
  counters:
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_env_steps_sampled: 12000
    num_env_steps_trained: 12000
  custom_metrics: {}
  date: 2022-07-30_15-14-13
  done: false
  episode_len_mean: 68.79
  episode_media: {}
  episode_reward_max: 406.0
  episode_reward_mean: 68.79
  episode_reward_min: 8.0
  episodes_this_iter: 35
  episodes_total: 299
  experiment_id: ad66a076bc0a453983e0662b54f88cdb
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.30000001192092896
          cur_lr: 4.999999873689376e-05
          entropy: 0.5737878680229187
          entropy_coeff: 0.0
          kl: 0.010294968262314796
          model: {}
          policy_loss: -0.021866288036108017
          total_loss: 9.229126930236816
          vf_explained_var: 0.08254499733448029
          vf_loss: 9.24790382385

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,116.0,449.158,464000.0,483.03,500.0,332.0,483.03
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,116.0,447.427,464000.0,462.25,500.0,218.0,462.25
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,3.0,12.5298,12000.0,68.79,406.0,8.0,68.79
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,88.0,452.033,352000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 468000
  counters:
    num_agent_steps_sampled: 468000
    num_agent_steps_trained: 468000
    num_env_steps_sampled: 468000
    num_env_steps_trained: 468000
  custom_metrics: {}
  date: 2022-07-30_15-14-16
  done: false
  episode_len_mean: 480.69
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 480.69
  episode_reward_min: 332.0
  episodes_this_iter: 8
  episodes_total: 1326
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.332267682489747e-16
          cur_lr: 4.999999873689376e-05
          entropy: 0.44742169976234436
          entropy_coeff: 0.0
          kl: 0.005894774105399847
          model: {}
          policy_loss: -0.0007556246127933264
          total_loss: 2.320317268371582
          vf_explained_var: -0.3560563623905182
          vf_loss: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,118.0,457.323,472000.0,480.84,500.0,332.0,480.84
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,118.0,455.491,472000.0,467.36,500.0,218.0,467.36
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,5.0,20.7089,20000.0,131.58,500.0,8.0,131.58
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,89.0,457.407,356000.0,498.54,500.0,421.0,498.54
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0


Result for PPOTrainer_CartPole-v1_c1a07_00002:
  agent_timesteps_total: 476000
  counters:
    num_agent_steps_sampled: 476000
    num_agent_steps_trained: 476000
    num_env_steps_sampled: 476000
    num_env_steps_trained: 476000
  custom_metrics: {}
  date: 2022-07-30_15-14-25
  done: false
  episode_len_mean: 471.64
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 471.64
  episode_reward_min: 218.0
  episodes_this_iter: 8
  episodes_total: 1334
  experiment_id: efef1a2bc4c54296b5efbb75abba9fd0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.970467098026714e-24
          cur_lr: 4.999999873689376e-05
          entropy: 0.3642273247241974
          entropy_coeff: 0.0
          kl: 0.001997249899432063
          model: {}
          policy_loss: 0.0009720443631522357
          total_loss: 1.750186800956726
          vf_explained_var: -0.143536776304245
          vf_loss: 1.7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,119.0,461.602,476000.0,480.84,500.0,332.0,480.84
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,119.0,459.629,476000.0,471.64,500.0,218.0,471.64
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,6.0,24.9108,24000.0,164.97,500.0,8.0,164.97
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,90.0,462.858,360000.0,497.63,500.0,409.0,497.63
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0


Result for PPOTrainer_CartPole-v1_c1a07_00003:
  agent_timesteps_total: 28000
  counters:
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_env_steps_sampled: 28000
    num_env_steps_trained: 28000
  custom_metrics: {}
  date: 2022-07-30_15-14-29
  done: false
  episode_len_mean: 201.01
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 201.01
  episode_reward_min: 8.0
  episodes_this_iter: 10
  episodes_total: 355
  experiment_id: ad66a076bc0a453983e0662b54f88cdb
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.15000000596046448
          cur_lr: 4.999999873689376e-05
          entropy: 0.5641421675682068
          entropy_coeff: 0.0
          kl: 0.004829449579119682
          model: {}
          policy_loss: -0.012733034789562225
          total_loss: 9.618794441223145
          vf_explained_var: 0.0060750944539904594
          vf_loss: 9.6308031

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,120.0,465.607,480000.0,480.84,500.0,332.0,480.84
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,120.0,463.575,480000.0,478.09,500.0,218.0,478.09
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,7.0,28.9686,28000.0,201.01,500.0,8.0,201.01
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,91.0,467.955,364000.0,496.36,500.0,409.0,496.36
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0


Result for PPOTrainer_CartPole-v1_c1a07_00002:
  agent_timesteps_total: 484000
  counters:
    num_agent_steps_sampled: 484000
    num_agent_steps_trained: 484000
    num_env_steps_sampled: 484000
    num_env_steps_trained: 484000
  custom_metrics: {}
  date: 2022-07-30_15-14-33
  done: false
  episode_len_mean: 478.35
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 478.35
  episode_reward_min: 218.0
  episodes_this_iter: 8
  episodes_total: 1350
  experiment_id: efef1a2bc4c54296b5efbb75abba9fd0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 9.926167745066785e-25
          cur_lr: 4.999999873689376e-05
          entropy: 0.3274685740470886
          entropy_coeff: 0.0
          kl: 0.004980407189577818
          model: {}
          policy_loss: 0.0011021422687917948
          total_loss: 1.9777692556381226
          vf_explained_var: -0.41833287477493286
          vf_loss: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,122.0,473.447,488000.0,479.25,500.0,332.0,479.25
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,122.0,471.093,488000.0,478.52,500.0,218.0,478.52
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,9.0,36.976,36000.0,263.86,500.0,23.0,263.86
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,92.0,473.283,368000.0,494.89,500.0,409.0,494.89
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0


Result for PPOTrainer_CartPole-v1_c1a07_00002:
  agent_timesteps_total: 492000
  counters:
    num_agent_steps_sampled: 492000
    num_agent_steps_trained: 492000
    num_env_steps_sampled: 492000
    num_env_steps_trained: 492000
  custom_metrics: {}
  date: 2022-07-30_15-14-40
  done: false
  episode_len_mean: 480.24
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 480.24
  episode_reward_min: 218.0
  episodes_this_iter: 8
  episodes_total: 1366
  experiment_id: efef1a2bc4c54296b5efbb75abba9fd0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.963083872533392e-25
          cur_lr: 4.999999873689376e-05
          entropy: 0.34596818685531616
          entropy_coeff: 0.0
          kl: 0.0022580556105822325
          model: {}
          policy_loss: 0.0023579781409353018
          total_loss: 2.8048017024993896
          vf_explained_var: -0.14289842545986176
          vf_loss

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,123.0,477.614,492000.0,482.24,500.0,332.0,482.24
PPOTrainer_CartPole-v1_c1a07_00002,RUNNING,192.168.1.85:44601,124.0,478.784,496000.0,480.67,500.0,218.0,480.67
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,10.0,41.0125,40000.0,294.34,500.0,23.0,294.34
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,93.0,478.304,372000.0,492.71,500.0,347.0,492.71
PPOTrainer_CartPole-v1_c1a07_00004,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0


Result for PPOTrainer_CartPole-v1_c1a07_00003:
  agent_timesteps_total: 44000
  counters:
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_env_steps_sampled: 44000
    num_env_steps_trained: 44000
  custom_metrics: {}
  date: 2022-07-30_15-14-45
  done: false
  episode_len_mean: 318.51
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 318.51
  episode_reward_min: 23.0
  episodes_this_iter: 10
  episodes_total: 395
  experiment_id: ad66a076bc0a453983e0662b54f88cdb
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.03750000149011612
          cur_lr: 4.999999873689376e-05
          entropy: 0.5575287938117981
          entropy_coeff: 0.0
          kl: 0.0032984239514917135
          model: {}
          policy_loss: -0.007638729643076658
          total_loss: 9.468690872192383
          vf_explained_var: 0.11447702348232269
          vf_loss: 9.4762058

[2m[33m(raylet)[0m bash: /home/ibraheem/miniconda3/envs/tf/lib/libtinfo.so.6: no version information available (required by bash)
[2m[36m(PPOTrainer pid=46303)[0m 2022-07-30 15:14:54,068	INFO trainer.py:2332 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPOTrainer pid=46303)[0m 2022-07-30 15:14:54,068	INFO ppo.py:414 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=46303)[0m 2022-07-30 15:14:54,068	INFO trainer.py:903 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[33m(raylet)[0m bash: /home/ibraheem/miniconda3/envs/tf/lib/libtinfo.so.6: no version informatio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,125.0,485.441,500000.0,483.79,500.0,332.0,483.79
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,12.0,49.1647,48000.0,343.31,500.0,27.0,343.31
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,94.0,483.654,376000.0,491.35,500.0,347.0,491.35
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 504000
  counters:
    num_agent_steps_sampled: 504000
    num_agent_steps_trained: 504000
    num_env_steps_sampled: 504000
    num_env_steps_trained: 504000
  custom_metrics: {}
  date: 2022-07-30_15-14-53
  done: false
  episode_len_mean: 485.91
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 485.91
  episode_reward_min: 332.0
  episodes_this_iter: 8
  episodes_total: 1399
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.6653346031121838e-17
          cur_lr: 4.999999873689376e-05
          entropy: 0.4362838864326477
          entropy_coeff: 0.0
          kl: 0.00587648106738925
          model: {}
          policy_loss: 0.0012929145013913512
          total_loss: 1.6217602491378784
          vf_explained_var: -9.887532360153273e-05
          vf_loss

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,126.0,489.101,504000.0,485.91,500.0,332.0,485.91
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,12.0,49.1647,48000.0,343.31,500.0,27.0,343.31
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,94.0,483.654,376000.0,491.35,500.0,347.0,491.35
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14


Result for PPOTrainer_CartPole-v1_c1a07_00003:
  agent_timesteps_total: 52000
  counters:
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_env_steps_sampled: 52000
    num_env_steps_trained: 52000
  custom_metrics: {}
  date: 2022-07-30_15-14-53
  done: false
  episode_len_mean: 368.97
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 368.97
  episode_reward_min: 27.0
  episodes_this_iter: 9
  episodes_total: 414
  experiment_id: ad66a076bc0a453983e0662b54f88cdb
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.01875000074505806
          cur_lr: 4.999999873689376e-05
          entropy: 0.5432854294776917
          entropy_coeff: 0.0
          kl: 0.007228154223412275
          model: {}
          policy_loss: -0.010493657551705837
          total_loss: 9.311995506286621
          vf_explained_var: 0.17254352569580078
          vf_loss: 9.322353363



Result for PPOTrainer_CartPole-v1_c1a07_00004:
  agent_timesteps_total: 4000
  counters:
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_env_steps_sampled: 4000
    num_env_steps_trained: 4000
  custom_metrics: {}
  date: 2022-07-30_15-15-02
  done: false
  episode_len_mean: 22.044198895027623
  episode_media: {}
  episode_reward_max: 68.0
  episode_reward_mean: 22.044198895027623
  episode_reward_min: 9.0
  episodes_this_iter: 181
  episodes_total: 181
  experiment_id: af77bd27bdfc4736a15cb677c14a8eb0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.6660923957824707
          entropy_coeff: 0.0
          kl: 0.028248082846403122
          model: {}
          policy_loss: -0.04262335225939751
          total_loss: 8.799177169799805
          vf_explained_var: 0.008301086723804474
          v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,127.0,493.334,508000.0,480.45,500.0,252.0,480.45
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,14.0,56.9872,56000.0,387.8,500.0,33.0,387.8
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,1.0,4.11042,4000.0,22.0442,68.0,9.0,22.0442
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,96.0,493.464,384000.0,482.42,500.0,166.0,482.42
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 512000
  counters:
    num_agent_steps_sampled: 512000
    num_agent_steps_trained: 512000
    num_env_steps_sampled: 512000
    num_env_steps_trained: 512000
  custom_metrics: {}
  date: 2022-07-30_15-15-06
  done: false
  episode_len_mean: 479.28
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 479.28
  episode_reward_min: 249.0
  episodes_this_iter: 10
  episodes_total: 1418
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.6653346031121838e-17
          cur_lr: 4.999999873689376e-05
          entropy: 0.4570953845977783
          entropy_coeff: 0.0
          kl: 0.006027149967849255
          model: {}
          policy_loss: 0.00046269508311524987
          total_loss: 2.492903232574463
          vf_explained_var: -0.004775377456098795
          vf_los

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,128.0,496.923,512000.0,479.28,500.0,249.0,479.28
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,15.0,61.1312,60000.0,401.96,500.0,33.0,401.96
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,2.0,8.20755,8000.0,41.59,151.0,10.0,41.59
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,97.0,499.218,388000.0,479.79,500.0,166.0,479.79
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14


Result for PPOTrainer_CartPole-v1_c1a07_00004:
  agent_timesteps_total: 12000
  counters:
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_env_steps_sampled: 12000
    num_env_steps_trained: 12000
  custom_metrics: {}
  date: 2022-07-30_15-15-10
  done: false
  episode_len_mean: 64.08
  episode_media: {}
  episode_reward_max: 274.0
  episode_reward_mean: 64.08
  episode_reward_min: 12.0
  episodes_this_iter: 44
  episodes_total: 315
  experiment_id: af77bd27bdfc4736a15cb677c14a8eb0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.30000001192092896
          cur_lr: 4.999999873689376e-05
          entropy: 0.5673825740814209
          entropy_coeff: 0.0
          kl: 0.010769584216177464
          model: {}
          policy_loss: -0.025372421368956566
          total_loss: 9.05777645111084
          vf_explained_var: 0.12033074349164963
          vf_loss: 9.07991981506

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,130.0,505.142,520000.0,480.59,500.0,249.0,480.59
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,16.0,65.5089,64000.0,416.02,500.0,33.0,416.02
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,3.0,12.3767,12000.0,64.08,274.0,12.0,64.08
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,97.0,499.218,388000.0,479.79,500.0,166.0,479.79
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 392000
  counters:
    num_agent_steps_sampled: 392000
    num_agent_steps_trained: 392000
    num_env_steps_sampled: 392000
    num_env_steps_trained: 392000
  custom_metrics: {}
  date: 2022-07-30_15-15-15
  done: false
  episode_len_mean: 477.75
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 477.75
  episode_reward_min: 166.0
  episodes_this_iter: 9
  episodes_total: 1131
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.42056936025619507
          entropy_coeff: 0.0
          kl: 0.004777511116117239
          model: {}
          policy_loss: -0.00041447675903327763
          total_loss: 2.1553540229797363
          vf_explained_var: 0.1848694235086441
          vf_loss: 2.154813051223755
        tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,131.0,509.727,524000.0,480.59,500.0,249.0,480.59
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,18.0,74.1476,72000.0,420.98,500.0,33.0,420.98
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,5.0,20.7711,20000.0,125.84,500.0,16.0,125.84
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,98.0,504.598,392000.0,477.75,500.0,166.0,477.75
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 396000
  counters:
    num_agent_steps_sampled: 396000
    num_agent_steps_trained: 396000
    num_env_steps_sampled: 396000
    num_env_steps_trained: 396000
  custom_metrics: {}
  date: 2022-07-30_15-15-20
  done: false
  episode_len_mean: 477.33
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 477.33
  episode_reward_min: 166.0
  episodes_this_iter: 8
  episodes_total: 1139
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.41210034489631653
          entropy_coeff: 0.0
          kl: 0.0023307285737246275
          model: {}
          policy_loss: 0.0005394835607148707
          total_loss: 1.3972430229187012
          vf_explained_var: 0.22862716019153595
          vf_loss: 1.3962374925613403
        t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,132.0,513.714,528000.0,480.59,500.0,249.0,480.59
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,19.0,78.115,76000.0,418.13,500.0,33.0,418.13
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,6.0,24.7481,24000.0,158.73,500.0,16.0,158.73
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,100.0,515.436,400000.0,477.26,500.0,166.0,477.26
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14


Result for PPOTrainer_CartPole-v1_c1a07_00004:
  agent_timesteps_total: 28000
  counters:
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_env_steps_sampled: 28000
    num_env_steps_trained: 28000
  custom_metrics: {}
  date: 2022-07-30_15-15-27
  done: false
  episode_len_mean: 191.62
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 191.62
  episode_reward_min: 16.0
  episodes_this_iter: 9
  episodes_total: 376
  experiment_id: af77bd27bdfc4736a15cb677c14a8eb0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.15000000596046448
          cur_lr: 4.999999873689376e-05
          entropy: 0.5536090731620789
          entropy_coeff: 0.0
          kl: 0.006604771129786968
          model: {}
          policy_loss: -0.02039823681116104
          total_loss: 9.582195281982422
          vf_explained_var: 0.0771235004067421
          vf_loss: 9.60160255432

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,134.0,521.749,536000.0,476.65,500.0,249.0,476.65
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,20.0,82.0571,80000.0,425.26,500.0,33.0,425.26
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,8.0,32.2008,32000.0,228.35,500.0,16.0,228.35
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,101.0,520.482,404000.0,478.27,500.0,166.0,478.27
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14


Result for PPOTrainer_CartPole-v1_c1a07_00003:
  agent_timesteps_total: 84000
  counters:
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_env_steps_sampled: 84000
    num_env_steps_trained: 84000
  custom_metrics: {}
  date: 2022-07-30_15-15-31
  done: false
  episode_len_mean: 438.8
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 438.8
  episode_reward_min: 33.0
  episodes_this_iter: 9
  episodes_total: 484
  experiment_id: ad66a076bc0a453983e0662b54f88cdb
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.004687500186264515
          cur_lr: 4.999999873689376e-05
          entropy: 0.5060914158821106
          entropy_coeff: 0.0
          kl: 0.00579292606562376
          model: {}
          policy_loss: -0.0021653082221746445
          total_loss: 8.467926025390625
          vf_explained_var: -0.01170338038355112
          vf_loss: 8.470063209

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,136.0,528.98,544000.0,464.26,500.0,216.0,464.26
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,22.0,89.3765,88000.0,451.71,500.0,155.0,451.71
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,9.0,36.0232,36000.0,256.01,500.0,16.0,256.01
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,102.0,525.594,408000.0,479.18,500.0,166.0,479.18
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14


Result for PPOTrainer_CartPole-v1_c1a07_00003:
  agent_timesteps_total: 92000
  counters:
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_env_steps_sampled: 92000
    num_env_steps_trained: 92000
  custom_metrics: {}
  date: 2022-07-30_15-15-39
  done: false
  episode_len_mean: 455.12
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 455.12
  episode_reward_min: 155.0
  episodes_this_iter: 9
  episodes_total: 501
  experiment_id: ad66a076bc0a453983e0662b54f88cdb
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.004687500186264515
          cur_lr: 4.999999873689376e-05
          entropy: 0.5386260747909546
          entropy_coeff: 0.0
          kl: 0.009605598635971546
          model: {}
          policy_loss: -0.0045049916952848434
          total_loss: 2.8675360679626465
          vf_explained_var: -0.014868217520415783
          vf_loss: 2.871

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,137.0,532.617,548000.0,464.26,500.0,216.0,464.26
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,24.0,96.9942,96000.0,461.71,500.0,197.0,461.71
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,12.0,47.1046,48000.0,351.8,500.0,39.0,351.8
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,103.0,530.765,412000.0,480.45,500.0,166.0,480.45
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 552000
  counters:
    num_agent_steps_sampled: 552000
    num_agent_steps_trained: 552000
    num_env_steps_sampled: 552000
    num_env_steps_trained: 552000
  custom_metrics: {}
  date: 2022-07-30_15-15-46
  done: false
  episode_len_mean: 463.04
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 463.04
  episode_reward_min: 216.0
  episodes_this_iter: 9
  episodes_total: 1503
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.1633365077804595e-18
          cur_lr: 4.999999873689376e-05
          entropy: 0.4658965468406677
          entropy_coeff: 0.0
          kl: 0.008118954487144947
          model: {}
          policy_loss: -0.0037018093280494213
          total_loss: 2.826439619064331
          vf_explained_var: -0.45652174949645996
          vf_loss:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,139.0,540.606,556000.0,469.04,500.0,216.0,469.04
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,26.0,104.641,104000.0,467.86,500.0,197.0,467.86
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,13.0,50.816,52000.0,382.94,500.0,39.0,382.94
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,105.0,541.008,420000.0,484.1,500.0,166.0,484.1
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 560000
  counters:
    num_agent_steps_sampled: 560000
    num_agent_steps_trained: 560000
    num_env_steps_sampled: 560000
    num_env_steps_trained: 560000
  custom_metrics: {}
  date: 2022-07-30_15-15-54
  done: false
  episode_len_mean: 472.46
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 472.46
  episode_reward_min: 216.0
  episodes_this_iter: 8
  episodes_total: 1519
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.1633365077804595e-18
          cur_lr: 4.999999873689376e-05
          entropy: 0.4426959156990051
          entropy_coeff: 0.0
          kl: 0.0077547128312289715
          model: {}
          policy_loss: -0.0004216160741634667
          total_loss: 2.368533134460449
          vf_explained_var: 0.001044427277520299
          vf_loss

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,140.0,544.701,560000.0,472.46,500.0,216.0,472.46
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,27.0,108.952,108000.0,465.96,500.0,197.0,465.96
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,14.0,55.04,56000.0,407.68,500.0,39.0,407.68
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,106.0,546.397,424000.0,485.46,500.0,166.0,485.46
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14


Result for PPOTrainer_CartPole-v1_c1a07_00004:
  agent_timesteps_total: 60000
  counters:
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_env_steps_sampled: 60000
    num_env_steps_trained: 60000
  custom_metrics: {}
  date: 2022-07-30_15-15-58
  done: false
  episode_len_mean: 430.66
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 430.66
  episode_reward_min: 39.0
  episodes_this_iter: 8
  episodes_total: 443
  experiment_id: af77bd27bdfc4736a15cb677c14a8eb0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.004687500186264515
          cur_lr: 4.999999873689376e-05
          entropy: 0.514772355556488
          entropy_coeff: 0.0
          kl: 0.010944927111268044
          model: {}
          policy_loss: -0.011107481084764004
          total_loss: 9.374533653259277
          vf_explained_var: 0.0011232675751671195
          vf_loss: 9.3855886

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,141.0,549.244,564000.0,458.56,500.0,175.0,458.56
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,28.0,113.749,112000.0,468.3,500.0,197.0,468.3
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,16.0,63.6504,64000.0,451.01,500.0,39.0,451.01
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,106.0,546.397,424000.0,485.46,500.0,166.0,485.46
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 428000
  counters:
    num_agent_steps_sampled: 428000
    num_agent_steps_trained: 428000
    num_env_steps_sampled: 428000
    num_env_steps_trained: 428000
  custom_metrics: {}
  date: 2022-07-30_15-16-02
  done: false
  episode_len_mean: 488.29
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 488.29
  episode_reward_min: 166.0
  episodes_this_iter: 8
  episodes_total: 1203
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.3505473732948303
          entropy_coeff: 0.0
          kl: 0.0017008811701089144
          model: {}
          policy_loss: 0.0024642108473926783
          total_loss: 2.7346720695495605
          vf_explained_var: 0.36490604281425476
          vf_loss: 2.731868028640747
        tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,143.0,557.789,572000.0,439.61,500.0,175.0,439.61
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,29.0,117.72,116000.0,477.13,500.0,216.0,477.13
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,17.0,67.762,68000.0,464.23,500.0,39.0,464.23
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,107.0,551.911,428000.0,488.29,500.0,166.0,488.29
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14


Result for PPOTrainer_CartPole-v1_c1a07_00006:
  agent_timesteps_total: 432000
  counters:
    num_agent_steps_sampled: 432000
    num_agent_steps_trained: 432000
    num_env_steps_sampled: 432000
    num_env_steps_trained: 432000
  custom_metrics: {}
  date: 2022-07-30_15-16-08
  done: false
  episode_len_mean: 492.93
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 492.93
  episode_reward_min: 354.0
  episodes_this_iter: 8
  episodes_total: 1211
  experiment_id: 7d72bfe4c7af4135afdb29472b95b747
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.3788769841194153
          entropy_coeff: 0.0
          kl: 0.0028806172776967287
          model: {}
          policy_loss: 0.0015059217112138867
          total_loss: 2.7339413166046143
          vf_explained_var: 0.36175745725631714
          vf_loss: 2.731858968734741
        tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,144.0,562.02,576000.0,434.83,500.0,175.0,434.83
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,31.0,125.764,124000.0,445.37,500.0,175.0,445.37
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,18.0,71.7777,72000.0,481.8,500.0,133.0,481.8
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,109.0,562.471,436000.0,496.49,500.0,408.0,496.49
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14


Result for PPOTrainer_CartPole-v1_c1a07_00004:
  agent_timesteps_total: 76000
  counters:
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_env_steps_sampled: 76000
    num_env_steps_trained: 76000
  custom_metrics: {}
  date: 2022-07-30_15-16-15
  done: false
  episode_len_mean: 486.53
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 486.53
  episode_reward_min: 133.0
  episodes_this_iter: 8
  episodes_total: 475
  experiment_id: af77bd27bdfc4736a15cb677c14a8eb0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0011718750465661287
          cur_lr: 4.999999873689376e-05
          entropy: 0.5406368970870972
          entropy_coeff: 0.0
          kl: 0.005661835428327322
          model: {}
          policy_loss: -0.006053644698113203
          total_loss: 9.04210376739502
          vf_explained_var: 0.012216789647936821
          vf_loss: 9.048150

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,145.0,566.127,580000.0,431.91,500.0,175.0,431.91
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,32.0,130.03,128000.0,437.13,500.0,147.0,437.13
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,19.0,76.0573,76000.0,486.53,500.0,133.0,486.53
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,110.0,567.779,440000.0,497.88,500.0,408.0,497.88
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 584000
  counters:
    num_agent_steps_sampled: 584000
    num_agent_steps_trained: 584000
    num_env_steps_sampled: 584000
    num_env_steps_trained: 584000
  custom_metrics: {}
  date: 2022-07-30_15-16-20
  done: false
  episode_len_mean: 437.77
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 437.77
  episode_reward_min: 175.0
  episodes_this_iter: 9
  episodes_total: 1578
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.1633365077804595e-18
          cur_lr: 4.999999873689376e-05
          entropy: 0.4513374865055084
          entropy_coeff: 0.0
          kl: 0.007391041610389948
          model: {}
          policy_loss: 0.00035319404560141265
          total_loss: 2.142491579055786
          vf_explained_var: 0.0003947177901864052
          vf_loss

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,146.0,570.21,584000.0,437.77,500.0,175.0,437.77
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,33.0,134.019,132000.0,437.13,500.0,147.0,437.13
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,21.0,83.865,84000.0,492.56,500.0,326.0,492.56
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,111.0,573.034,444000.0,496.66,500.0,352.0,496.66
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 592000
  counters:
    num_agent_steps_sampled: 592000
    num_agent_steps_trained: 592000
    num_env_steps_sampled: 592000
    num_env_steps_trained: 592000
  custom_metrics: {}
  date: 2022-07-30_15-16-28
  done: false
  episode_len_mean: 442.8
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 442.8
  episode_reward_min: 175.0
  episodes_this_iter: 8
  episodes_total: 1594
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.1633365077804595e-18
          cur_lr: 4.999999873689376e-05
          entropy: 0.41709187626838684
          entropy_coeff: 0.0
          kl: 0.007441854104399681
          model: {}
          policy_loss: 0.0009728111326694489
          total_loss: 2.047344207763672
          vf_explained_var: -0.00018066847405862063
          vf_loss

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,148.0,578.398,592000.0,442.8,500.0,175.0,442.8
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,35.0,142.002,140000.0,439.7,500.0,147.0,439.7
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,22.0,87.6956,88000.0,492.75,500.0,326.0,492.75
PPOTrainer_CartPole-v1_c1a07_00006,RUNNING,192.168.1.85:44132,112.0,578.322,448000.0,492.07,500.0,145.0,492.07
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14


Result for PPOTrainer_CartPole-v1_c1a07_00004:
  agent_timesteps_total: 92000
  counters:
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_env_steps_sampled: 92000
    num_env_steps_trained: 92000
  custom_metrics: {}
  date: 2022-07-30_15-16-30
  done: false
  episode_len_mean: 496.15
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 496.15
  episode_reward_min: 326.0
  episodes_this_iter: 8
  episodes_total: 507
  experiment_id: af77bd27bdfc4736a15cb677c14a8eb0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0011718750465661287
          cur_lr: 4.999999873689376e-05
          entropy: 0.4924822449684143
          entropy_coeff: 0.0
          kl: 0.006576919462531805
          model: {}
          policy_loss: 0.00039813583134673536
          total_loss: 3.242238998413086
          vf_explained_var: 0.04087809845805168
          vf_loss: 3.24183

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,149.0,582.525,596000.0,443.47,500.0,175.0,443.47
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,36.0,146.13,144000.0,417.19,500.0,144.0,417.19
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,24.0,95.0604,96000.0,495.21,500.0,326.0,495.21
PPOTrainer_CartPole-v1_c1a07_00005,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 600000
  counters:
    num_agent_steps_sampled: 600000
    num_agent_steps_trained: 600000
    num_env_steps_sampled: 600000
    num_env_steps_trained: 600000
  custom_metrics: {}
  date: 2022-07-30_15-16-36
  done: false
  episode_len_mean: 443.16
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 443.16
  episode_reward_min: 175.0
  episodes_this_iter: 8
  episodes_total: 1610
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.1633365077804595e-18
          cur_lr: 4.999999873689376e-05
          entropy: 0.4082312285900116
          entropy_coeff: 0.0
          kl: 0.006764959078282118
          model: {}
          policy_loss: -0.0018983546178787947
          total_loss: 1.8075785636901855
          vf_explained_var: 0.001324493670836091
          vf_loss

[2m[33m(raylet)[0m bash: /home/ibraheem/miniconda3/envs/tf/lib/libtinfo.so.6: no version information available (required by bash)


Result for PPOTrainer_CartPole-v1_c1a07_00004:
  agent_timesteps_total: 100000
  counters:
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_env_steps_sampled: 100000
    num_env_steps_trained: 100000
  custom_metrics: {}
  date: 2022-07-30_15-16-38
  done: false
  episode_len_mean: 488.65
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 488.65
  episode_reward_min: 211.0
  episodes_this_iter: 10
  episodes_total: 525
  experiment_id: af77bd27bdfc4736a15cb677c14a8eb0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0011718750465661287
          cur_lr: 4.999999873689376e-05
          entropy: 0.4674103856086731
          entropy_coeff: 0.0
          kl: 0.004093213006854057
          model: {}
          policy_loss: -0.00041318568401038647
          total_loss: 2.650038242340088
          vf_explained_var: 0.032063938677310944
          vf_loss:

[2m[36m(PPOTrainer pid=46650)[0m 2022-07-30 15:16:41,491	INFO trainer.py:2332 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPOTrainer pid=46650)[0m 2022-07-30 15:16:41,492	INFO ppo.py:414 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=46650)[0m 2022-07-30 15:16:41,492	INFO trainer.py:903 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[33m(raylet)[0m bash: /home/ibraheem/miniconda3/envs/tf/lib/libtinfo.so.6: no version information available (required by bash)
[2m[33m(raylet)[0m bash: /home/ibraheem/miniconda3/envs/tf/lib/libtinfo.so.6: no version informatio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,151.0,590.437,604000.0,443.16,500.0,175.0,443.16
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,37.0,150.42,148000.0,405.68,500.0,144.0,405.68
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,25.0,99.1544,100000.0,488.65,500.0,211.0,488.65
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 608000
  counters:
    num_agent_steps_sampled: 608000
    num_agent_steps_trained: 608000
    num_env_steps_sampled: 608000
    num_env_steps_trained: 608000
  custom_metrics: {}
  date: 2022-07-30_15-16-43
  done: false
  episode_len_mean: 446.04
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 446.04
  episode_reward_min: 185.0
  episodes_this_iter: 9
  episodes_total: 1627
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.1633365077804595e-18
          cur_lr: 4.999999873689376e-05
          entropy: 0.39350059628486633
          entropy_coeff: 0.0
          kl: 0.005715787876397371
          model: {}
          policy_loss: -0.0008944347500801086
          total_loss: 2.4235026836395264
          vf_explained_var: -0.33287057280540466
          vf_los

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,152.0,593.381,608000.0,446.04,500.0,185.0,446.04
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,37.0,150.42,148000.0,405.68,500.0,144.0,405.68
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,25.0,99.1544,100000.0,488.65,500.0,211.0,488.65
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00004:
  agent_timesteps_total: 104000
  counters:
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_env_steps_sampled: 104000
    num_env_steps_trained: 104000
  custom_metrics: {}
  date: 2022-07-30_15-16-41
  done: false
  episode_len_mean: 488.65
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 488.65
  episode_reward_min: 211.0
  episodes_this_iter: 8
  episodes_total: 533
  experiment_id: af77bd27bdfc4736a15cb677c14a8eb0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0005859375232830644
          cur_lr: 4.999999873689376e-05
          entropy: 0.46171095967292786
          entropy_coeff: 0.0
          kl: 0.005771601106971502
          model: {}
          policy_loss: 0.00031915504951030016
          total_loss: 1.8127646446228027
          vf_explained_var: -0.03759381175041199
          vf_loss:



Result for PPOTrainer_CartPole-v1_c1a07_00005:
  agent_timesteps_total: 4000
  counters:
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_env_steps_sampled: 4000
    num_env_steps_trained: 4000
  custom_metrics: {}
  date: 2022-07-30_15-16-53
  done: false
  episode_len_mean: 22.913793103448278
  episode_media: {}
  episode_reward_max: 143.0
  episode_reward_mean: 22.913793103448278
  episode_reward_min: 9.0
  episodes_this_iter: 174
  episodes_total: 174
  experiment_id: 9e74af98a3ab4dd6a288fdc0a842613a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.668215274810791
          entropy_coeff: 0.0
          kl: 0.02494211308658123
          model: {}
          policy_loss: -0.030583346262574196
          total_loss: 8.849096298217773
          vf_explained_var: 0.004326668102294207
          vf_loss: 8.874691009521484
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,153.0,597.232,612000.0,459.13,500.0,186.0,459.13
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,39.0,158.437,156000.0,405.91,500.0,144.0,405.91
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,27.0,106.881,108000.0,488.65,500.0,211.0,488.65
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,1.0,5.61063,4000.0,22.9138,143.0,9.0,22.9138
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 616000
  counters:
    num_agent_steps_sampled: 616000
    num_agent_steps_trained: 616000
    num_env_steps_sampled: 616000
    num_env_steps_trained: 616000
  custom_metrics: {}
  date: 2022-07-30_15-16-56
  done: false
  episode_len_mean: 461.92
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 461.92
  episode_reward_min: 165.0
  episodes_this_iter: 10
  episodes_total: 1646
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.0816682538902298e-18
          cur_lr: 4.999999873689376e-05
          entropy: 0.4035104513168335
          entropy_coeff: 0.0
          kl: 0.007681329734623432
          model: {}
          policy_loss: -0.005705887917429209
          total_loss: 1.7130447626113892
          vf_explained_var: -0.00012259419600013644
          vf_l

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,154.0,602.096,616000.0,461.92,500.0,165.0,461.92
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,40.0,163.234,160000.0,403.44,500.0,144.0,403.44
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,28.0,111.797,112000.0,488.65,500.0,211.0,488.65
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,2.0,11.8875,8000.0,45.01,169.0,10.0,45.01
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 624000
  counters:
    num_agent_steps_sampled: 624000
    num_agent_steps_trained: 624000
    num_env_steps_sampled: 624000
    num_env_steps_trained: 624000
  custom_metrics: {}
  date: 2022-07-30_15-17-05
  done: false
  episode_len_mean: 463.2
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 463.2
  episode_reward_min: 165.0
  episodes_this_iter: 10
  episodes_total: 1664
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.0816682538902298e-18
          cur_lr: 4.999999873689376e-05
          entropy: 0.35690492391586304
          entropy_coeff: 0.0
          kl: 0.005084781441837549
          model: {}
          policy_loss: 0.0007750728982500732
          total_loss: 3.120736837387085
          vf_explained_var: -0.18852071464061737
          vf_loss: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,156.0,611.461,624000.0,463.2,500.0,165.0,463.2
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,41.0,167.831,164000.0,418.94,500.0,144.0,418.94
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,29.0,116.193,116000.0,488.65,500.0,211.0,488.65
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,2.0,11.8875,8000.0,45.01,169.0,10.0,45.01
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00005:
  agent_timesteps_total: 12000
  counters:
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_env_steps_sampled: 12000
    num_env_steps_trained: 12000
  custom_metrics: {}
  date: 2022-07-30_15-17-05
  done: false
  episode_len_mean: 68.61
  episode_media: {}
  episode_reward_max: 259.0
  episode_reward_mean: 68.61
  episode_reward_min: 10.0
  episodes_this_iter: 38
  episodes_total: 292
  experiment_id: 9e74af98a3ab4dd6a288fdc0a842613a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.5786438584327698
          entropy_coeff: 0.0
          kl: 0.009085615165531635
          model: {}
          policy_loss: -0.023621734231710434
          total_loss: 9.121074676513672
          vf_explained_var: 0.09157992154359818
          vf_loss: 9.142878532409668
        train: null
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,157.0,617.14,628000.0,463.76,500.0,165.0,463.76
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,42.0,173.13,168000.0,435.03,500.0,144.0,435.03
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,30.0,121.593,120000.0,488.65,500.0,211.0,488.65
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,3.0,18.1841,12000.0,68.61,259.0,10.0,68.61
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00003:
  agent_timesteps_total: 172000
  counters:
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_env_steps_sampled: 172000
    num_env_steps_trained: 172000
  custom_metrics: {}
  date: 2022-07-30_15-17-11
  done: false
  episode_len_mean: 441.65
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 441.65
  episode_reward_min: 144.0
  episodes_this_iter: 8
  episodes_total: 685
  experiment_id: ad66a076bc0a453983e0662b54f88cdb
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.662109520519152e-05
          cur_lr: 4.999999873689376e-05
          entropy: 0.46779724955558777
          entropy_coeff: 0.0
          kl: 0.00470369728282094
          model: {}
          policy_loss: -0.002547256648540497
          total_loss: 1.8674182891845703
          vf_explained_var: -0.45818376541137695
          vf_loss: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,157.0,617.14,628000.0,463.76,500.0,165.0,463.76
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,44.0,183.456,176000.0,441.65,500.0,144.0,441.65
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,31.0,126.87,124000.0,474.66,500.0,193.0,474.66
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,4.0,24.9493,16000.0,98.52,447.0,11.0,98.52
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 632000
  counters:
    num_agent_steps_sampled: 632000
    num_agent_steps_trained: 632000
    num_env_steps_sampled: 632000
    num_env_steps_trained: 632000
  custom_metrics: {}
  date: 2022-07-30_15-17-16
  done: false
  episode_len_mean: 466.32
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 466.32
  episode_reward_min: 165.0
  episodes_this_iter: 8
  episodes_total: 1680
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.0408341269451149e-18
          cur_lr: 4.999999873689376e-05
          entropy: 0.3194078803062439
          entropy_coeff: 0.0
          kl: 0.004986564163118601
          model: {}
          policy_loss: 0.00027958693681284785
          total_loss: 2.3692331314086914
          vf_explained_var: 7.449772965628654e-05
          vf_los

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,159.0,627.544,636000.0,466.32,500.0,165.0,466.32
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,45.0,188.187,180000.0,434.63,500.0,144.0,434.63
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,32.0,132.147,128000.0,475.16,500.0,193.0,475.16
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,5.0,32.2138,20000.0,130.02,487.0,11.0,130.02
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00004:
  agent_timesteps_total: 132000
  counters:
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_env_steps_sampled: 132000
    num_env_steps_trained: 132000
  custom_metrics: {}
  date: 2022-07-30_15-17-22
  done: false
  episode_len_mean: 475.16
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 475.16
  episode_reward_min: 193.0
  episodes_this_iter: 8
  episodes_total: 591
  experiment_id: af77bd27bdfc4736a15cb677c14a8eb0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 7.324219041038305e-05
          cur_lr: 4.999999873689376e-05
          entropy: 0.4555370509624481
          entropy_coeff: 0.0
          kl: 0.007031084969639778
          model: {}
          policy_loss: -0.0008025796269066632
          total_loss: 1.814773440361023
          vf_explained_var: -0.1853363811969757
          vf_loss: 1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,160.0,632.802,640000.0,466.32,500.0,165.0,466.32
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,46.0,193.262,184000.0,433.22,500.0,144.0,433.22
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,33.0,137.435,132000.0,475.16,500.0,193.0,475.16
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,6.0,38.7339,24000.0,165.0,500.0,13.0,165.0
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00004:
  agent_timesteps_total: 136000
  counters:
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_env_steps_sampled: 136000
    num_env_steps_trained: 136000
  custom_metrics: {}
  date: 2022-07-30_15-17-27
  done: false
  episode_len_mean: 473.0
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 473.0
  episode_reward_min: 193.0
  episodes_this_iter: 9
  episodes_total: 600
  experiment_id: af77bd27bdfc4736a15cb677c14a8eb0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 7.324219041038305e-05
          cur_lr: 4.999999873689376e-05
          entropy: 0.4577447175979614
          entropy_coeff: 0.0
          kl: 0.0057883416302502155
          model: {}
          policy_loss: 0.00024025389575399458
          total_loss: 1.8066047430038452
          vf_explained_var: -0.14720076322555542
          vf_loss: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,161.0,637.809,644000.0,467.21,500.0,165.0,467.21
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,47.0,197.712,188000.0,446.66,500.0,144.0,446.66
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,35.0,147.269,140000.0,474.19,500.0,193.0,474.19
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,7.0,44.8372,28000.0,198.28,500.0,13.0,198.28
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00003:
  agent_timesteps_total: 192000
  counters:
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_env_steps_sampled: 192000
    num_env_steps_trained: 192000
  custom_metrics: {}
  date: 2022-07-30_15-17-36
  done: false
  episode_len_mean: 461.1
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 461.1
  episode_reward_min: 244.0
  episodes_this_iter: 8
  episodes_total: 726
  experiment_id: ad66a076bc0a453983e0662b54f88cdb
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 9.15527380129788e-06
          cur_lr: 4.999999873689376e-05
          entropy: 0.4713704288005829
          entropy_coeff: 0.0
          kl: 0.008019627071917057
          model: {}
          policy_loss: -0.00013310871145222336
          total_loss: 1.4817293882369995
          vf_explained_var: -0.5345367193222046
          vf_loss: 1.4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,162.0,642.464,648000.0,469.78,500.0,165.0,469.78
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,48.0,202.485,192000.0,461.1,500.0,244.0,461.1
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,36.0,152.644,144000.0,460.49,500.0,149.0,460.49
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,7.0,44.8372,28000.0,198.28,500.0,13.0,198.28
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00005:
  agent_timesteps_total: 32000
  counters:
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_env_steps_sampled: 32000
    num_env_steps_trained: 32000
  custom_metrics: {}
  date: 2022-07-30_15-17-39
  done: false
  episode_len_mean: 227.23
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 227.23
  episode_reward_min: 13.0
  episodes_this_iter: 11
  episodes_total: 362
  experiment_id: 9e74af98a3ab4dd6a288fdc0a842613a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.5368067622184753
          entropy_coeff: 0.0
          kl: 0.004409255925565958
          model: {}
          policy_loss: -0.011613540351390839
          total_loss: 9.486872673034668
          vf_explained_var: 0.1777946501970291
          vf_loss: 9.497604370117188
        train: null


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,163.0,647.816,652000.0,469.78,500.0,165.0,469.78
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,49.0,207.613,196000.0,463.78,500.0,244.0,463.78
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,37.0,158.231,148000.0,418.85,500.0,124.0,418.85
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,8.0,51.7479,32000.0,227.23,500.0,13.0,227.23
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00003:
  agent_timesteps_total: 200000
  counters:
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_env_steps_sampled: 200000
    num_env_steps_trained: 200000
  custom_metrics: {}
  date: 2022-07-30_15-17-47
  done: false
  episode_len_mean: 464.21
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 464.21
  episode_reward_min: 244.0
  episodes_this_iter: 8
  episodes_total: 743
  experiment_id: ad66a076bc0a453983e0662b54f88cdb
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 9.15527380129788e-06
          cur_lr: 4.999999873689376e-05
          entropy: 0.5022633671760559
          entropy_coeff: 0.0
          kl: 0.0058561405166983604
          model: {}
          policy_loss: 0.0006654041935689747
          total_loss: 1.9663947820663452
          vf_explained_var: -0.29117345809936523
          vf_loss: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,164.0,653.876,656000.0,472.92,500.0,165.0,472.92
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,50.0,213.836,200000.0,464.21,500.0,244.0,464.21
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,37.0,158.231,148000.0,418.85,500.0,124.0,418.85
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,9.0,60.1826,36000.0,259.64,500.0,24.0,259.64
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00004:
  agent_timesteps_total: 152000
  counters:
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_env_steps_sampled: 152000
    num_env_steps_trained: 152000
  custom_metrics: {}
  date: 2022-07-30_15-17-49
  done: false
  episode_len_mean: 412.82
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 412.82
  episode_reward_min: 124.0
  episodes_this_iter: 9
  episodes_total: 646
  experiment_id: af77bd27bdfc4736a15cb677c14a8eb0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.662109520519152e-05
          cur_lr: 4.999999873689376e-05
          entropy: 0.4720599353313446
          entropy_coeff: 0.0
          kl: 0.0065354942344129086
          model: {}
          policy_loss: -0.0004201996198389679
          total_loss: 2.11544132232666
          vf_explained_var: -0.139058917760849
          vf_loss: 2.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,165.0,659.156,660000.0,481.12,500.0,165.0,481.12
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,51.0,218.835,204000.0,471.38,500.0,244.0,471.38
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,38.0,163.841,152000.0,412.82,500.0,124.0,412.82
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,9.0,60.1826,36000.0,259.64,500.0,24.0,259.64
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00004:
  agent_timesteps_total: 156000
  counters:
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_env_steps_sampled: 156000
    num_env_steps_trained: 156000
  custom_metrics: {}
  date: 2022-07-30_15-17-54
  done: false
  episode_len_mean: 412.82
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 412.82
  episode_reward_min: 124.0
  episodes_this_iter: 8
  episodes_total: 654
  experiment_id: af77bd27bdfc4736a15cb677c14a8eb0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.662109520519152e-05
          cur_lr: 4.999999873689376e-05
          entropy: 0.4574855864048004
          entropy_coeff: 0.0
          kl: 0.006055512931197882
          model: {}
          policy_loss: 0.0009198089828714728
          total_loss: 1.626015305519104
          vf_explained_var: -0.3379530608654022
          vf_loss: 1.6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,166.0,664.45,664000.0,486.55,500.0,174.0,486.55
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,52.0,224.238,208000.0,482.16,500.0,244.0,482.16
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,39.0,168.864,156000.0,412.82,500.0,124.0,412.82
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,10.0,67.1946,40000.0,295.66,500.0,38.0,295.66
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00004:
  agent_timesteps_total: 160000
  counters:
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_env_steps_sampled: 160000
    num_env_steps_trained: 160000
  custom_metrics: {}
  date: 2022-07-30_15-17-59
  done: false
  episode_len_mean: 411.24
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 411.24
  episode_reward_min: 124.0
  episodes_this_iter: 8
  episodes_total: 662
  experiment_id: af77bd27bdfc4736a15cb677c14a8eb0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.662109520519152e-05
          cur_lr: 4.999999873689376e-05
          entropy: 0.498852014541626
          entropy_coeff: 0.0
          kl: 0.005687607917934656
          model: {}
          policy_loss: -0.0022445176728069782
          total_loss: 1.8327749967575073
          vf_explained_var: -0.4746273458003998
          vf_loss: 1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,167.0,669.002,668000.0,486.55,500.0,174.0,486.55
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,54.0,233.519,216000.0,483.08,500.0,244.0,483.08
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,41.0,178.52,164000.0,414.54,500.0,121.0,414.54
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,11.0,74.1327,44000.0,322.68,500.0,55.0,322.68
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00005:
  agent_timesteps_total: 48000
  counters:
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_env_steps_sampled: 48000
    num_env_steps_trained: 48000
  custom_metrics: {}
  date: 2022-07-30_15-18-08
  done: false
  episode_len_mean: 349.75
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 349.75
  episode_reward_min: 93.0
  episodes_this_iter: 8
  episodes_total: 397
  experiment_id: 9e74af98a3ab4dd6a288fdc0a842613a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.5397251844406128
          entropy_coeff: 0.0
          kl: 0.0015529332449659705
          model: {}
          policy_loss: -0.006321358494460583
          total_loss: 9.454010963439941
          vf_explained_var: 0.013754262588918209
          vf_loss: 9.46002197265625
        train: null

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,168.0,673.935,672000.0,493.13,500.0,228.0,493.13
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,55.0,238.482,220000.0,476.42,500.0,244.0,476.42
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,42.0,183.219,168000.0,418.0,500.0,121.0,418.0
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,12.0,80.5721,48000.0,349.75,500.0,93.0,349.75
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00005:
  agent_timesteps_total: 52000
  counters:
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_env_steps_sampled: 52000
    num_env_steps_trained: 52000
  custom_metrics: {}
  date: 2022-07-30_15-18-15
  done: false
  episode_len_mean: 371.78
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 371.78
  episode_reward_min: 93.0
  episodes_this_iter: 10
  episodes_total: 407
  experiment_id: 9e74af98a3ab4dd6a288fdc0a842613a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.546025276184082
          entropy_coeff: 0.0
          kl: 0.00314515084028244
          model: {}
          policy_loss: -0.006761712953448296
          total_loss: 9.402941703796387
          vf_explained_var: 0.008869292214512825
          vf_loss: 9.409074783325195
        train: null


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,169.0,678.903,676000.0,497.94,500.0,294.0,497.94
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,56.0,243.322,224000.0,475.13,500.0,244.0,475.13
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,44.0,192.517,176000.0,420.28,500.0,121.0,420.28
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,13.0,87.0089,52000.0,371.78,500.0,93.0,371.78
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 680000
  counters:
    num_agent_steps_sampled: 680000
    num_agent_steps_trained: 680000
    num_env_steps_sampled: 680000
    num_env_steps_trained: 680000
  custom_metrics: {}
  date: 2022-07-30_15-18-18
  done: false
  episode_len_mean: 497.94
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 497.94
  episode_reward_min: 294.0
  episodes_this_iter: 8
  episodes_total: 1777
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.0164395770948388e-21
          cur_lr: 4.999999873689376e-05
          entropy: 0.24849626421928406
          entropy_coeff: 0.0
          kl: 0.002160080010071397
          model: {}
          policy_loss: 0.003594484180212021
          total_loss: 2.312063455581665
          vf_explained_var: 0.0016072152648121119
          vf_loss:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,171.0,688.322,684000.0,497.94,500.0,294.0,497.94
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,57.0,248.311,228000.0,478.8,500.0,244.0,478.8
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,45.0,197.332,180000.0,421.03,500.0,121.0,421.03
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,14.0,93.7909,56000.0,385.85,500.0,93.0,385.85
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00003:
  agent_timesteps_total: 232000
  counters:
    num_agent_steps_sampled: 232000
    num_agent_steps_trained: 232000
    num_env_steps_sampled: 232000
    num_env_steps_trained: 232000
  custom_metrics: {}
  date: 2022-07-30_15-18-27
  done: false
  episode_len_mean: 481.49
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 481.49
  episode_reward_min: 244.0
  episodes_this_iter: 8
  episodes_total: 811
  experiment_id: ad66a076bc0a453983e0662b54f88cdb
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 9.15527380129788e-06
          cur_lr: 4.999999873689376e-05
          entropy: 0.47720780968666077
          entropy_coeff: 0.0
          kl: 0.007656562607735395
          model: {}
          policy_loss: -0.00039016996743157506
          total_loss: 2.015739917755127
          vf_explained_var: -0.4082774817943573
          vf_loss: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,172.0,693.831,688000.0,497.94,500.0,294.0,497.94
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,58.0,253.586,232000.0,481.49,500.0,244.0,481.49
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,46.0,202.513,184000.0,426.54,500.0,121.0,426.54
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,14.0,93.7909,56000.0,385.85,500.0,93.0,385.85
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00005:
  agent_timesteps_total: 60000
  counters:
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_env_steps_sampled: 60000
    num_env_steps_trained: 60000
  custom_metrics: {}
  date: 2022-07-30_15-18-29
  done: false
  episode_len_mean: 405.85
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 405.85
  episode_reward_min: 93.0
  episodes_this_iter: 8
  episodes_total: 425
  experiment_id: 9e74af98a3ab4dd6a288fdc0a842613a
  hostname: meeharbi
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 0.533174455165863
          entropy_coeff: 0.0
          kl: 0.004406000021845102
          model: {}
          policy_loss: -0.005515731405466795
          total_loss: 9.247832298278809
          vf_explained_var: 0.0546613372862339
          vf_loss: 9.25246524810791
        train: null
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,172.0,693.831,688000.0,497.94,500.0,294.0,497.94
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,59.0,259.119,236000.0,481.49,500.0,244.0,481.49
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,47.0,208.163,188000.0,438.13,500.0,121.0,438.13
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,15.0,100.837,60000.0,405.85,500.0,93.0,405.85
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 692000
  counters:
    num_agent_steps_sampled: 692000
    num_agent_steps_trained: 692000
    num_env_steps_sampled: 692000
    num_env_steps_trained: 692000
  custom_metrics: {}
  date: 2022-07-30_15-18-34
  done: false
  episode_len_mean: 500.0
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 500.0
  episode_reward_min: 500.0
  episodes_this_iter: 8
  episodes_total: 1801
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.2705494713685484e-22
          cur_lr: 4.999999873689376e-05
          entropy: 0.252058207988739
          entropy_coeff: 0.0
          kl: 0.006015613675117493
          model: {}
          policy_loss: 0.0010920778149738908
          total_loss: 2.309562921524048
          vf_explained_var: -0.00037834446993656456
          vf_loss: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,174.0,704.01,696000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,60.0,264.101,240000.0,479.99,500.0,244.0,479.99
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,47.0,208.163,188000.0,438.13,500.0,121.0,438.13
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,16.0,107.99,64000.0,419.33,500.0,93.0,419.33
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00004:
  agent_timesteps_total: 192000
  counters:
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_env_steps_sampled: 192000
    num_env_steps_trained: 192000
  custom_metrics: {}
  date: 2022-07-30_15-18-39
  done: false
  episode_len_mean: 458.32
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 458.32
  episode_reward_min: 121.0
  episodes_this_iter: 8
  episodes_total: 728
  experiment_id: af77bd27bdfc4736a15cb677c14a8eb0
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.28881845032447e-06
          cur_lr: 4.999999873689376e-05
          entropy: 0.4733930826187134
          entropy_coeff: 0.0
          kl: 0.007644186727702618
          model: {}
          policy_loss: -0.0033577047288417816
          total_loss: 1.8826565742492676
          vf_explained_var: -0.3204238712787628
          vf_loss: 1.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,175.0,709.172,700000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,61.0,268.833,244000.0,475.07,500.0,244.0,475.07
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,49.0,218.2,196000.0,481.58,500.0,121.0,481.58
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,17.0,114.7,68000.0,427.19,500.0,93.0,427.19
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00003:
  agent_timesteps_total: 248000
  counters:
    num_agent_steps_sampled: 248000
    num_agent_steps_trained: 248000
    num_env_steps_sampled: 248000
    num_env_steps_trained: 248000
  custom_metrics: {}
  date: 2022-07-30_15-18-48
  done: false
  episode_len_mean: 466.09
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 466.09
  episode_reward_min: 244.0
  episodes_this_iter: 10
  episodes_total: 847
  experiment_id: ad66a076bc0a453983e0662b54f88cdb
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 9.15527380129788e-06
          cur_lr: 4.999999873689376e-05
          entropy: 0.46295011043548584
          entropy_coeff: 0.0
          kl: 0.009835444390773773
          model: {}
          policy_loss: -0.01853766106069088
          total_loss: 2.216845750808716
          vf_explained_var: -0.4137084186077118
          vf_loss: 2.2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,176.0,714.278,704000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,62.0,273.927,248000.0,466.09,500.0,244.0,466.09
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,50.0,223.691,200000.0,484.37,500.0,121.0,484.37
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,18.0,121.66,72000.0,439.57,500.0,93.0,439.57
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


Result for PPOTrainer_CartPole-v1_c1a07_00003:
  agent_timesteps_total: 252000
  counters:
    num_agent_steps_sampled: 252000
    num_agent_steps_trained: 252000
    num_env_steps_sampled: 252000
    num_env_steps_trained: 252000
  custom_metrics: {}
  date: 2022-07-30_15-18-53
  done: false
  episode_len_mean: 464.51
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 464.51
  episode_reward_min: 244.0
  episodes_this_iter: 8
  episodes_total: 855
  experiment_id: ad66a076bc0a453983e0662b54f88cdb
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 9.15527380129788e-06
          cur_lr: 4.999999873689376e-05
          entropy: 0.4841488003730774
          entropy_coeff: 0.0
          kl: 0.004467551130801439
          model: {}
          policy_loss: 0.0013171267928555608
          total_loss: 2.0426487922668457
          vf_explained_var: -0.4676682949066162
          vf_loss: 2.0



Result for PPOTrainer_CartPole-v1_c1a07_00000:
  agent_timesteps_total: 708000
  counters:
    num_agent_steps_sampled: 708000
    num_agent_steps_trained: 708000
    num_env_steps_sampled: 708000
    num_env_steps_trained: 708000
  custom_metrics: {}
  date: 2022-07-30_15-18-54
  done: false
  episode_len_mean: 500.0
  episode_media: {}
  episode_reward_max: 500.0
  episode_reward_mean: 500.0
  episode_reward_min: 500.0
  episodes_this_iter: 8
  episodes_total: 1833
  experiment_id: d0d232835bcc4b4481f32af584f23b26
  hostname: meeharbi
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.5881868392106856e-23
          cur_lr: 4.999999873689376e-05
          entropy: 0.2847386598587036
          entropy_coeff: 0.0
          kl: 0.0029939310625195503
          model: {}
          policy_loss: 0.002603460568934679
          total_loss: 2.3110713958740234
          vf_explained_var: 0.0
          vf_loss: 2.3084676265716553

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_CartPole-v1_c1a07_00000,RUNNING,192.168.1.85:44312,177.0,719.314,708000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00003,RUNNING,192.168.1.85:46041,63.0,279.169,252000.0,464.51,500.0,244.0,464.51
PPOTrainer_CartPole-v1_c1a07_00004,RUNNING,192.168.1.85:46303,50.0,223.691,200000.0,484.37,500.0,121.0,484.37
PPOTrainer_CartPole-v1_c1a07_00005,RUNNING,192.168.1.85:46650,18.0,121.66,72000.0,439.57,500.0,93.0,439.57
PPOTrainer_CartPole-v1_c1a07_00007,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00008,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00009,PENDING,,,,,,,,
PPOTrainer_CartPole-v1_c1a07_00001,TERMINATED,192.168.1.85:44450,111.0,428.928,444000.0,500.0,500.0,500.0,500.0
PPOTrainer_CartPole-v1_c1a07_00002,TERMINATED,192.168.1.85:44601,125.0,482.499,500000.0,474.14,500.0,218.0,474.14
PPOTrainer_CartPole-v1_c1a07_00006,TERMINATED,192.168.1.85:44132,113.0,583.802,452000.0,490.5,500.0,145.0,490.5


2022-07-30 15:18:59,086	ERROR tune.py:743 -- Trials did not complete: [PPOTrainer_CartPole-v1_c1a07_00000, PPOTrainer_CartPole-v1_c1a07_00003, PPOTrainer_CartPole-v1_c1a07_00004, PPOTrainer_CartPole-v1_c1a07_00005, PPOTrainer_CartPole-v1_c1a07_00007, PPOTrainer_CartPole-v1_c1a07_00008, PPOTrainer_CartPole-v1_c1a07_00009]
2022-07-30 15:18:59,087	INFO tune.py:747 -- Total run time: 771.85 seconds (767.76 seconds for the tuning loop).


[PPOTrainer_CartPole-v1_c1a07_00000,
 PPOTrainer_CartPole-v1_c1a07_00001,
 PPOTrainer_CartPole-v1_c1a07_00002,
 PPOTrainer_CartPole-v1_c1a07_00003,
 PPOTrainer_CartPole-v1_c1a07_00004,
 PPOTrainer_CartPole-v1_c1a07_00005,
 PPOTrainer_CartPole-v1_c1a07_00006,
 PPOTrainer_CartPole-v1_c1a07_00007,
 PPOTrainer_CartPole-v1_c1a07_00008,
 PPOTrainer_CartPole-v1_c1a07_00009]