# Challenge 2 : Lunar Lander

## Imports :

In [1]:
import os

# Reinforcement Learning
import gym

# Distributed Learning
import ray
from ray.rllib.agents.ppo import PPOTrainer

# Display
from gym.wrappers.monitoring.video_recorder import VideoRecorder
from IPython.display import Video

# Optional
import warnings
warnings.filterwarnings('ignore')

## Context :

In [2]:
env = gym.make("LunarLander-v2")

In [3]:
env.action_space

Discrete(4)

In [4]:
env.reset()

array([ 0.0015523 ,  1.4024421 ,  0.15720968, -0.3767962 , -0.00179188,
       -0.03561032,  0.        ,  0.        ], dtype=float32)

## Random Action :

In [5]:
def check_video_folder_sanity(path, video_name):
    video_path = path + video_name
    os.makedirs(path, exist_ok=True)
    if os.path.exists(video_path + ".mp4"):
        os.remove(video_path + ".mp4")
    if os.path.exists(video_path + ".meta.json"):
        os.remove(video_path + ".meta.json")
    return video_path

In [6]:
video_name = "before_training"
path = "videos/lunar_lander/"
random_seed = 42

video_path = check_video_folder_sanity(path, video_name)
    
env = gym.make("LunarLander-v2")
env.action_space.seed(random_seed)
video = VideoRecorder(env, video_path + ".mp4", enabled=video_name is not None)

env.reset()
for i in range(200):
    env.render()
    video.capture_frame()
    observation, reward, done, info = env.step(env.action_space.sample())
    #print("step", i, observation, reward, done, info)

video.close()
env.close()

In [7]:
Video(video_path + ".mp4")

## Train an agent :

In [8]:
os.cpu_count()

12

In [9]:
config = {
    "env": "LunarLander-v2",
    # “tf” to use tensorflow, "torch" to use pytorch
    "framework": "tf",
    "model": {
        "fcnet_hiddens": [32],
        "fcnet_activation": "linear",
    },
}
stop = {"episode_reward_mean": 200}
ray.shutdown()
ray.init(num_cpus=4, include_dashboard=False,
         ignore_reinit_error=True, log_to_driver=False)
# Start Training 
analysis = ray.tune.run("PPO", config=config,
                        stop=stop, checkpoint_at_end=True)

2022-07-23 21:08:56,173	ERROR syncer.py:147 -- Log sync requires rsync to be installed.


Trial name,status,loc
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104


Trial name,status,loc
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104


Trial name,status,loc
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 4000
  counters:
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_env_steps_sampled: 4000
    num_env_steps_trained: 4000
  custom_metrics: {}
  date: 2022-07-23_21-09-37
  done: false
  episode_len_mean: 94.11904761904762
  episode_media: {}
  episode_reward_max: -47.270826812156216
  episode_reward_mean: -186.30786380875594
  episode_reward_min: -490.3767149990626
  episodes_this_iter: 42
  episodes_total: 42
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 1.383880376815796
          entropy_coeff: 0.0
          kl: 0.002605424029752612
          model: {}
          policy_loss: 0.001994385151192546
          total_loss: 9.649328231811523
          vf_explained_var: -0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,1,14.7636,4000,-186.308,-47.2708,-490.377,94.119


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,1,14.7636,4000,-186.308,-47.2708,-490.377,94.119


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,1,14.7636,4000,-186.308,-47.2708,-490.377,94.119


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 8000
  counters:
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_env_steps_sampled: 8000
    num_env_steps_trained: 8000
  custom_metrics: {}
  date: 2022-07-23_21-09-52
  done: false
  episode_len_mean: 95.0952380952381
  episode_media: {}
  episode_reward_max: 35.127822441849986
  episode_reward_mean: -188.70900978420056
  episode_reward_min: -495.8865334128032
  episodes_this_iter: 42
  episodes_total: 84
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.10000000149011612
          cur_lr: 4.999999873689376e-05
          entropy: 1.3779852390289307
          entropy_coeff: 0.0
          kl: 0.008093826472759247
          model: {}
          policy_loss: -0.0075630443170666695
          total_loss: 9.754026412963867
          vf_explained_var: -

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,2,30.1509,8000,-188.709,35.1278,-495.887,95.0952


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,2,30.1509,8000,-188.709,35.1278,-495.887,95.0952


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,2,30.1509,8000,-188.709,35.1278,-495.887,95.0952


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 12000
  counters:
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_env_steps_sampled: 12000
    num_env_steps_trained: 12000
  custom_metrics: {}
  date: 2022-07-23_21-10-08
  done: false
  episode_len_mean: 92.4
  episode_media: {}
  episode_reward_max: 35.127822441849986
  episode_reward_mean: -185.38282388693136
  episode_reward_min: -533.830136830341
  episodes_this_iter: 43
  episodes_total: 127
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.10000000149011612
          cur_lr: 4.999999873689376e-05
          entropy: 1.377658486366272
          entropy_coeff: 0.0
          kl: 0.004631415009498596
          model: {}
          policy_loss: 0.004894576035439968
          total_loss: 9.720930099487305
          vf_explained_var: -0.00138375

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,3,45.4208,12000,-185.383,35.1278,-533.83,92.4


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,3,45.4208,12000,-185.383,35.1278,-533.83,92.4


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,3,45.4208,12000,-185.383,35.1278,-533.83,92.4


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 16000
  counters:
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_env_steps_sampled: 16000
    num_env_steps_trained: 16000
  custom_metrics: {}
  date: 2022-07-23_21-10-23
  done: false
  episode_len_mean: 89.03
  episode_media: {}
  episode_reward_max: 28.99235089254779
  episode_reward_mean: -158.7890354720979
  episode_reward_min: -533.830136830341
  episodes_this_iter: 43
  episodes_total: 170
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.05000000074505806
          cur_lr: 4.999999873689376e-05
          entropy: 1.377145767211914
          entropy_coeff: 0.0
          kl: 0.002545403316617012
          model: {}
          policy_loss: 0.000899725069757551
          total_loss: 9.745716094970703
          vf_explained_var: -0.003708984

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,4,60.873,16000,-158.789,28.9924,-533.83,89.03


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,4,60.873,16000,-158.789,28.9924,-533.83,89.03


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,4,60.873,16000,-158.789,28.9924,-533.83,89.03


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 20000
  counters:
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_env_steps_sampled: 20000
    num_env_steps_trained: 20000
  custom_metrics: {}
  date: 2022-07-23_21-10-40
  done: false
  episode_len_mean: 98.96
  episode_media: {}
  episode_reward_max: 76.17136337250976
  episode_reward_mean: -135.33715547174958
  episode_reward_min: -478.76080979954077
  episodes_this_iter: 36
  episodes_total: 206
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.02500000037252903
          cur_lr: 4.999999873689376e-05
          entropy: 1.3684638738632202
          entropy_coeff: 0.0
          kl: 0.0030034719966351986
          model: {}
          policy_loss: 0.0024036129470914602
          total_loss: 9.416964530944824
          vf_explained_var: 0.0006

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,5,77.2296,20000,-135.337,76.1714,-478.761,98.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,5,77.2296,20000,-135.337,76.1714,-478.761,98.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,5,77.2296,20000,-135.337,76.1714,-478.761,98.96


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 24000
  counters:
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_env_steps_sampled: 24000
    num_env_steps_trained: 24000
  custom_metrics: {}
  date: 2022-07-23_21-10-55
  done: false
  episode_len_mean: 104.13
  episode_media: {}
  episode_reward_max: 76.17136337250976
  episode_reward_mean: -129.60599412613112
  episode_reward_min: -338.17309702320136
  episodes_this_iter: 38
  episodes_total: 244
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.012500000186264515
          cur_lr: 4.999999873689376e-05
          entropy: 1.3640488386154175
          entropy_coeff: 0.0
          kl: 0.004020067397505045
          model: {}
          policy_loss: 0.007771192584186792
          total_loss: 9.744162559509277
          vf_explained_var: -0.005

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,6,92.8477,24000,-129.606,76.1714,-338.173,104.13


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,6,92.8477,24000,-129.606,76.1714,-338.173,104.13


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 28000
  counters:
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_env_steps_sampled: 28000
    num_env_steps_trained: 28000
  custom_metrics: {}
  date: 2022-07-23_21-11-10
  done: false
  episode_len_mean: 107.01
  episode_media: {}
  episode_reward_max: 76.17136337250976
  episode_reward_mean: -124.78359336873253
  episode_reward_min: -342.3633048649298
  episodes_this_iter: 43
  episodes_total: 287
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0062500000931322575
          cur_lr: 4.999999873689376e-05
          entropy: 1.3584671020507812
          entropy_coeff: 0.0
          kl: 0.0023306726943701506
          model: {}
          policy_loss: 0.0028792659286409616
          total_loss: 9.705246925354004
          vf_explained_var: -0.0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,7,108.074,28000,-124.784,76.1714,-342.363,107.01


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,7,108.074,28000,-124.784,76.1714,-342.363,107.01


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,7,108.074,28000,-124.784,76.1714,-342.363,107.01


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,7,108.074,28000,-124.784,76.1714,-342.363,107.01


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 32000
  counters:
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_env_steps_sampled: 32000
    num_env_steps_trained: 32000
  custom_metrics: {}
  date: 2022-07-23_21-11-27
  done: false
  episode_len_mean: 94.39
  episode_media: {}
  episode_reward_max: 22.16821564176675
  episode_reward_mean: -117.8315390426373
  episode_reward_min: -342.3633048649298
  episodes_this_iter: 43
  episodes_total: 330
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0031250000465661287
          cur_lr: 4.999999873689376e-05
          entropy: 1.3554353713989258
          entropy_coeff: 0.0
          kl: 0.004343204665929079
          model: {}
          policy_loss: 0.0038789501413702965
          total_loss: 9.672109603881836
          vf_explained_var: -0.0084

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,8,124.565,32000,-117.832,22.1682,-342.363,94.39


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,8,124.565,32000,-117.832,22.1682,-342.363,94.39


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,8,124.565,32000,-117.832,22.1682,-342.363,94.39


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 36000
  counters:
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_env_steps_sampled: 36000
    num_env_steps_trained: 36000
  custom_metrics: {}
  date: 2022-07-23_21-11-44
  done: false
  episode_len_mean: 97.11
  episode_media: {}
  episode_reward_max: 22.16821564176675
  episode_reward_mean: -108.82520078707215
  episode_reward_min: -342.3633048649298
  episodes_this_iter: 38
  episodes_total: 368
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0015625000232830644
          cur_lr: 4.999999873689376e-05
          entropy: 1.3387322425842285
          entropy_coeff: 0.0
          kl: 0.005982272792607546
          model: {}
          policy_loss: 0.002818656386807561
          total_loss: 9.578492164611816
          vf_explained_var: -0.0081

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,9,141.161,36000,-108.825,22.1682,-342.363,97.11


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,9,141.161,36000,-108.825,22.1682,-342.363,97.11


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,9,141.161,36000,-108.825,22.1682,-342.363,97.11


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 40000
  counters:
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_env_steps_sampled: 40000
    num_env_steps_trained: 40000
  custom_metrics: {}
  date: 2022-07-23_21-12-00
  done: false
  episode_len_mean: 103.52
  episode_media: {}
  episode_reward_max: 22.16821564176675
  episode_reward_mean: -111.63356822412239
  episode_reward_min: -282.60881287012137
  episodes_this_iter: 37
  episodes_total: 405
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0015625000232830644
          cur_lr: 4.999999873689376e-05
          entropy: 1.3239283561706543
          entropy_coeff: 0.0
          kl: 0.0038878414779901505
          model: {}
          policy_loss: -0.01507867407053709
          total_loss: 9.511441230773926
          vf_explained_var: -0.0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,10,157.146,40000,-111.634,22.1682,-282.609,103.52


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,10,157.146,40000,-111.634,22.1682,-282.609,103.52


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,10,157.146,40000,-111.634,22.1682,-282.609,103.52


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 44000
  counters:
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_env_steps_sampled: 44000
    num_env_steps_trained: 44000
  custom_metrics: {}
  date: 2022-07-23_21-12-15
  done: false
  episode_len_mean: 108.97
  episode_media: {}
  episode_reward_max: 2.983102994899312
  episode_reward_mean: -106.86214347067818
  episode_reward_min: -257.63896929391547
  episodes_this_iter: 36
  episodes_total: 441
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0007812500116415322
          cur_lr: 4.999999873689376e-05
          entropy: 1.306004524230957
          entropy_coeff: 0.0
          kl: 0.006104701664298773
          model: {}
          policy_loss: -0.0049924240447580814
          total_loss: 9.376426696777344
          vf_explained_var: -0.0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,11,172.824,44000,-106.862,2.9831,-257.639,108.97


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,11,172.824,44000,-106.862,2.9831,-257.639,108.97


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,11,172.824,44000,-106.862,2.9831,-257.639,108.97


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 48000
  counters:
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_env_steps_sampled: 48000
    num_env_steps_trained: 48000
  custom_metrics: {}
  date: 2022-07-23_21-12-31
  done: false
  episode_len_mean: 114.8
  episode_media: {}
  episode_reward_max: 15.764519567913794
  episode_reward_mean: -102.13465473802137
  episode_reward_min: -269.00399461529605
  episodes_this_iter: 33
  episodes_total: 474
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0007812500116415322
          cur_lr: 4.999999873689376e-05
          entropy: 1.2690484523773193
          entropy_coeff: 0.0
          kl: 0.006047194357961416
          model: {}
          policy_loss: 0.00013367283099796623
          total_loss: 9.659345626831055
          vf_explained_var: -0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,12,188.547,48000,-102.135,15.7645,-269.004,114.8


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,12,188.547,48000,-102.135,15.7645,-269.004,114.8


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,12,188.547,48000,-102.135,15.7645,-269.004,114.8


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 52000
  counters:
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_env_steps_sampled: 52000
    num_env_steps_trained: 52000
  custom_metrics: {}
  date: 2022-07-23_21-12-47
  done: false
  episode_len_mean: 118.74
  episode_media: {}
  episode_reward_max: 22.350294555580845
  episode_reward_mean: -90.49308724198107
  episode_reward_min: -269.00399461529605
  episodes_this_iter: 32
  episodes_total: 506
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0007812500116415322
          cur_lr: 4.999999873689376e-05
          entropy: 1.2642009258270264
          entropy_coeff: 0.0
          kl: 0.0007468675612471998
          model: {}
          policy_loss: 0.0067871613427996635
          total_loss: 9.33940601348877
          vf_explained_var: -0.0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,13,204.593,52000,-90.4931,22.3503,-269.004,118.74


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,13,204.593,52000,-90.4931,22.3503,-269.004,118.74


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,13,204.593,52000,-90.4931,22.3503,-269.004,118.74


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 56000
  counters:
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_env_steps_sampled: 56000
    num_env_steps_trained: 56000
  custom_metrics: {}
  date: 2022-07-23_21-13-03
  done: false
  episode_len_mean: 123.7
  episode_media: {}
  episode_reward_max: 40.440089683957524
  episode_reward_mean: -84.63788598616833
  episode_reward_min: -269.00399461529605
  episodes_this_iter: 31
  episodes_total: 537
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0003906250058207661
          cur_lr: 4.999999873689376e-05
          entropy: 1.215530276298523
          entropy_coeff: 0.0
          kl: 0.003127786098048091
          model: {}
          policy_loss: 0.005213839467614889
          total_loss: 9.276552200317383
          vf_explained_var: -0.0206

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,14,220.587,56000,-84.6379,40.4401,-269.004,123.7


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,14,220.587,56000,-84.6379,40.4401,-269.004,123.7


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,14,220.587,56000,-84.6379,40.4401,-269.004,123.7


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 60000
  counters:
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_env_steps_sampled: 60000
    num_env_steps_trained: 60000
  custom_metrics: {}
  date: 2022-07-23_21-13-21
  done: false
  episode_len_mean: 140.91
  episode_media: {}
  episode_reward_max: 40.440089683957524
  episode_reward_mean: -79.24720161242391
  episode_reward_min: -229.56029549714083
  episodes_this_iter: 20
  episodes_total: 557
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.00019531250291038305
          cur_lr: 4.999999873689376e-05
          entropy: 1.2143245935440063
          entropy_coeff: 0.0
          kl: 0.001008270657621324
          model: {}
          policy_loss: -0.0008889363380149007
          total_loss: 9.318926811218262
          vf_explained_var: -0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,15,237.972,60000,-79.2472,40.4401,-229.56,140.91


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,15,237.972,60000,-79.2472,40.4401,-229.56,140.91


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,15,237.972,60000,-79.2472,40.4401,-229.56,140.91


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 64000
  counters:
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_env_steps_sampled: 64000
    num_env_steps_trained: 64000
  custom_metrics: {}
  date: 2022-07-23_21-13-37
  done: false
  episode_len_mean: 151.92
  episode_media: {}
  episode_reward_max: 40.440089683957524
  episode_reward_mean: -81.9289299166383
  episode_reward_min: -252.91169682038887
  episodes_this_iter: 23
  episodes_total: 580
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 9.765625145519152e-05
          cur_lr: 4.999999873689376e-05
          entropy: 1.1848716735839844
          entropy_coeff: 0.0
          kl: 0.0008314625010825694
          model: {}
          policy_loss: 0.004124657716602087
          total_loss: 9.045998573303223
          vf_explained_var: -0.02

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,16,254.151,64000,-81.9289,40.4401,-252.912,151.92


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,16,254.151,64000,-81.9289,40.4401,-252.912,151.92


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,16,254.151,64000,-81.9289,40.4401,-252.912,151.92


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 68000
  counters:
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_env_steps_sampled: 68000
    num_env_steps_trained: 68000
  custom_metrics: {}
  date: 2022-07-23_21-13-53
  done: false
  episode_len_mean: 164.42
  episode_media: {}
  episode_reward_max: 40.440089683957524
  episode_reward_mean: -77.46126799374711
  episode_reward_min: -268.71238748826147
  episodes_this_iter: 23
  episodes_total: 603
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.882812572759576e-05
          cur_lr: 4.999999873689376e-05
          entropy: 1.220192551612854
          entropy_coeff: 0.0
          kl: 0.00718785310164094
          model: {}
          policy_loss: 0.0023110201582312584
          total_loss: 9.275885581970215
          vf_explained_var: -0.035

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,17,269.615,68000,-77.4613,40.4401,-268.712,164.42


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,17,269.615,68000,-77.4613,40.4401,-268.712,164.42


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,17,269.615,68000,-77.4613,40.4401,-268.712,164.42


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 72000
  counters:
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_env_steps_sampled: 72000
    num_env_steps_trained: 72000
  custom_metrics: {}
  date: 2022-07-23_21-14-09
  done: false
  episode_len_mean: 177.99
  episode_media: {}
  episode_reward_max: 39.564281730071286
  episode_reward_mean: -72.68689365109371
  episode_reward_min: -268.71238748826147
  episodes_this_iter: 21
  episodes_total: 624
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.882812572759576e-05
          cur_lr: 4.999999873689376e-05
          entropy: 1.1890348196029663
          entropy_coeff: 0.0
          kl: 0.004810071550309658
          model: {}
          policy_loss: -0.009186782874166965
          total_loss: 9.091049194335938
          vf_explained_var: -0.0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,18,286.322,72000,-72.6869,39.5643,-268.712,177.99


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,18,286.322,72000,-72.6869,39.5643,-268.712,177.99


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,18,286.322,72000,-72.6869,39.5643,-268.712,177.99


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 76000
  counters:
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_env_steps_sampled: 76000
    num_env_steps_trained: 76000
  custom_metrics: {}
  date: 2022-07-23_21-14-28
  done: false
  episode_len_mean: 192.94
  episode_media: {}
  episode_reward_max: 39.564281730071286
  episode_reward_mean: -69.46717158271139
  episode_reward_min: -268.71238748826147
  episodes_this_iter: 11
  episodes_total: 635
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.441406286379788e-05
          cur_lr: 4.999999873689376e-05
          entropy: 1.215390920639038
          entropy_coeff: 0.0
          kl: 0.0006293823244050145
          model: {}
          policy_loss: 0.0025614674668759108
          total_loss: 9.017485618591309
          vf_explained_var: 0.01

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,19,304.939,76000,-69.4672,39.5643,-268.712,192.94


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,19,304.939,76000,-69.4672,39.5643,-268.712,192.94


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,19,304.939,76000,-69.4672,39.5643,-268.712,192.94


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 80000
  counters:
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_env_steps_sampled: 80000
    num_env_steps_trained: 80000
  custom_metrics: {}
  date: 2022-07-23_21-14-45
  done: false
  episode_len_mean: 198.76
  episode_media: {}
  episode_reward_max: 39.564281730071286
  episode_reward_mean: -61.034674355484974
  episode_reward_min: -268.71238748826147
  episodes_this_iter: 19
  episodes_total: 654
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.220703143189894e-05
          cur_lr: 4.999999873689376e-05
          entropy: 1.1563150882720947
          entropy_coeff: 0.0
          kl: 0.00502745620906353
          model: {}
          policy_loss: -0.003206182038411498
          total_loss: 9.156157493591309
          vf_explained_var: -0.0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,20,321.466,80000,-61.0347,39.5643,-268.712,198.76


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,20,321.466,80000,-61.0347,39.5643,-268.712,198.76


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,20,321.466,80000,-61.0347,39.5643,-268.712,198.76


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 84000
  counters:
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_env_steps_sampled: 84000
    num_env_steps_trained: 84000
  custom_metrics: {}
  date: 2022-07-23_21-15-03
  done: false
  episode_len_mean: 217.52
  episode_media: {}
  episode_reward_max: 39.564281730071286
  episode_reward_mean: -52.14859991690179
  episode_reward_min: -268.71238748826147
  episodes_this_iter: 14
  episodes_total: 668
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.220703143189894e-05
          cur_lr: 4.999999873689376e-05
          entropy: 1.1788264513015747
          entropy_coeff: 0.0
          kl: 0.0030874141957610846
          model: {}
          policy_loss: 0.0032767681404948235
          total_loss: 9.124924659729004
          vf_explained_var: -0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,21,339.682,84000,-52.1486,39.5643,-268.712,217.52


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,21,339.682,84000,-52.1486,39.5643,-268.712,217.52


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,21,339.682,84000,-52.1486,39.5643,-268.712,217.52


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 88000
  counters:
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_env_steps_sampled: 88000
    num_env_steps_trained: 88000
  custom_metrics: {}
  date: 2022-07-23_21-15-22
  done: false
  episode_len_mean: 238.84
  episode_media: {}
  episode_reward_max: 37.11047501488392
  episode_reward_mean: -49.41335415331939
  episode_reward_min: -268.71238748826147
  episodes_this_iter: 13
  episodes_total: 681
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 6.10351571594947e-06
          cur_lr: 4.999999873689376e-05
          entropy: 1.2169028520584106
          entropy_coeff: 0.0
          kl: 0.0021893938537687063
          model: {}
          policy_loss: -0.019786037504673004
          total_loss: 9.33321762084961
          vf_explained_var: -0.017

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,22,358.577,88000,-49.4134,37.1105,-268.712,238.84


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,22,358.577,88000,-49.4134,37.1105,-268.712,238.84


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,22,358.577,88000,-49.4134,37.1105,-268.712,238.84


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 92000
  counters:
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_env_steps_sampled: 92000
    num_env_steps_trained: 92000
  custom_metrics: {}
  date: 2022-07-23_21-15-39
  done: false
  episode_len_mean: 246.61
  episode_media: {}
  episode_reward_max: 59.965601205996165
  episode_reward_mean: -36.3006294157321
  episode_reward_min: -224.29278862719036
  episodes_this_iter: 17
  episodes_total: 698
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.051757857974735e-06
          cur_lr: 4.999999873689376e-05
          entropy: 1.1710922718048096
          entropy_coeff: 0.0
          kl: 0.0014524746220558882
          model: {}
          policy_loss: -0.0013264245353639126
          total_loss: 9.183445930480957
          vf_explained_var: -0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,23,375.174,92000,-36.3006,59.9656,-224.293,246.61


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,23,375.174,92000,-36.3006,59.9656,-224.293,246.61


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,23,375.174,92000,-36.3006,59.9656,-224.293,246.61


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 96000
  counters:
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_env_steps_sampled: 96000
    num_env_steps_trained: 96000
  custom_metrics: {}
  date: 2022-07-23_21-15-57
  done: false
  episode_len_mean: 256.05
  episode_media: {}
  episode_reward_max: 59.965601205996165
  episode_reward_mean: -32.86056699004329
  episode_reward_min: -224.29278862719036
  episodes_this_iter: 9
  episodes_total: 707
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.5258789289873675e-06
          cur_lr: 4.999999873689376e-05
          entropy: 1.2310134172439575
          entropy_coeff: 0.0
          kl: 0.0014493003254756331
          model: {}
          policy_loss: 0.0009533846168778837
          total_loss: 8.753918647766113
          vf_explained_var: 0.0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,24,393.473,96000,-32.8606,59.9656,-224.293,256.05


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,24,393.473,96000,-32.8606,59.9656,-224.293,256.05


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,24,393.473,96000,-32.8606,59.9656,-224.293,256.05


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 100000
  counters:
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_env_steps_sampled: 100000
    num_env_steps_trained: 100000
  custom_metrics: {}
  date: 2022-07-23_21-16-14
  done: false
  episode_len_mean: 273.41
  episode_media: {}
  episode_reward_max: 71.8223251864785
  episode_reward_mean: -30.620637720584224
  episode_reward_min: -224.29278862719036
  episodes_this_iter: 20
  episodes_total: 727
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 7.629394644936838e-07
          cur_lr: 4.999999873689376e-05
          entropy: 1.1479332447052002
          entropy_coeff: 0.0
          kl: 0.005687843542546034
          model: {}
          policy_loss: -0.0017730345716699958
          total_loss: 9.115547180175781
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,25,410.453,100000,-30.6206,71.8223,-224.293,273.41


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,25,410.453,100000,-30.6206,71.8223,-224.293,273.41


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,25,410.453,100000,-30.6206,71.8223,-224.293,273.41


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 104000
  counters:
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_env_steps_sampled: 104000
    num_env_steps_trained: 104000
  custom_metrics: {}
  date: 2022-07-23_21-16-32
  done: false
  episode_len_mean: 283.99
  episode_media: {}
  episode_reward_max: 71.8223251864785
  episode_reward_mean: -27.33301626879087
  episode_reward_min: -194.50068356799747
  episodes_this_iter: 9
  episodes_total: 736
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 7.629394644936838e-07
          cur_lr: 4.999999873689376e-05
          entropy: 1.190750002861023
          entropy_coeff: 0.0
          kl: 0.0016304133459925652
          model: {}
          policy_loss: -0.011156494729220867
          total_loss: 9.307275772094727
          vf_explained_var: 0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,26,428.506,104000,-27.333,71.8223,-194.501,283.99


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,26,428.506,104000,-27.333,71.8223,-194.501,283.99


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,26,428.506,104000,-27.333,71.8223,-194.501,283.99


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 108000
  counters:
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_env_steps_sampled: 108000
    num_env_steps_trained: 108000
  custom_metrics: {}
  date: 2022-07-23_21-16-51
  done: false
  episode_len_mean: 300.79
  episode_media: {}
  episode_reward_max: 71.8223251864785
  episode_reward_mean: -27.64251874156124
  episode_reward_min: -210.62188438252508
  episodes_this_iter: 7
  episodes_total: 743
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.814697322468419e-07
          cur_lr: 4.999999873689376e-05
          entropy: 1.2189821004867554
          entropy_coeff: 0.0
          kl: 0.0006100864848122001
          model: {}
          policy_loss: -0.002176144393160939
          total_loss: 9.047746658325195
          vf_explained_var: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,27,446.899,108000,-27.6425,71.8223,-210.622,300.79


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,27,446.899,108000,-27.6425,71.8223,-210.622,300.79


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,27,446.899,108000,-27.6425,71.8223,-210.622,300.79


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 112000
  counters:
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_env_steps_sampled: 112000
    num_env_steps_trained: 112000
  custom_metrics: {}
  date: 2022-07-23_21-17-10
  done: false
  episode_len_mean: 324.83
  episode_media: {}
  episode_reward_max: 71.8223251864785
  episode_reward_mean: -27.82938198816527
  episode_reward_min: -210.62188438252508
  episodes_this_iter: 11
  episodes_total: 754
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.9073486612342094e-07
          cur_lr: 4.999999873689376e-05
          entropy: 1.1988931894302368
          entropy_coeff: 0.0
          kl: 0.00044086281559430063
          model: {}
          policy_loss: 0.003149833995848894
          total_loss: 9.08513069152832
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,28,465.78,112000,-27.8294,71.8223,-210.622,324.83


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,28,465.78,112000,-27.8294,71.8223,-210.622,324.83


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,28,465.78,112000,-27.8294,71.8223,-210.622,324.83


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 116000
  counters:
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_env_steps_sampled: 116000
    num_env_steps_trained: 116000
  custom_metrics: {}
  date: 2022-07-23_21-17-28
  done: false
  episode_len_mean: 343.82
  episode_media: {}
  episode_reward_max: 71.8223251864785
  episode_reward_mean: -26.76692088484086
  episode_reward_min: -210.62188438252508
  episodes_this_iter: 6
  episodes_total: 760
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 9.536743306171047e-08
          cur_lr: 4.999999873689376e-05
          entropy: 1.2178679704666138
          entropy_coeff: 0.0
          kl: 0.00208766246214509
          model: {}
          policy_loss: -0.006753486581146717
          total_loss: 8.943523406982422
          vf_explained_var: 0.0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,29,484.218,116000,-26.7669,71.8223,-210.622,343.82


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,29,484.218,116000,-26.7669,71.8223,-210.622,343.82


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,29,484.218,116000,-26.7669,71.8223,-210.622,343.82


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 120000
  counters:
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_env_steps_sampled: 120000
    num_env_steps_trained: 120000
  custom_metrics: {}
  date: 2022-07-23_21-17-47
  done: false
  episode_len_mean: 340.37
  episode_media: {}
  episode_reward_max: 71.8223251864785
  episode_reward_mean: -22.745447687336835
  episode_reward_min: -210.62188438252508
  episodes_this_iter: 11
  episodes_total: 771
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.7683716530855236e-08
          cur_lr: 4.999999873689376e-05
          entropy: 1.1641252040863037
          entropy_coeff: 0.0
          kl: 0.0005876515642739832
          model: {}
          policy_loss: -0.004694963339716196
          total_loss: 9.120759010314941
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,30,503.022,120000,-22.7454,71.8223,-210.622,340.37


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,30,503.022,120000,-22.7454,71.8223,-210.622,340.37


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,30,503.022,120000,-22.7454,71.8223,-210.622,340.37


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 124000
  counters:
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_env_steps_sampled: 124000
    num_env_steps_trained: 124000
  custom_metrics: {}
  date: 2022-07-23_21-18-05
  done: false
  episode_len_mean: 356.0
  episode_media: {}
  episode_reward_max: 71.8223251864785
  episode_reward_mean: -17.850326416463687
  episode_reward_min: -210.62188438252508
  episodes_this_iter: 9
  episodes_total: 780
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.3841858265427618e-08
          cur_lr: 4.999999873689376e-05
          entropy: 1.1815687417984009
          entropy_coeff: 0.0
          kl: 0.0016480395570397377
          model: {}
          policy_loss: -0.005918070208281279
          total_loss: 8.903120994567871
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,31,521.372,124000,-17.8503,71.8223,-210.622,356


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,31,521.372,124000,-17.8503,71.8223,-210.622,356


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,31,521.372,124000,-17.8503,71.8223,-210.622,356


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 128000
  counters:
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_env_steps_sampled: 128000
    num_env_steps_trained: 128000
  custom_metrics: {}
  date: 2022-07-23_21-18-23
  done: false
  episode_len_mean: 375.41
  episode_media: {}
  episode_reward_max: 71.8223251864785
  episode_reward_mean: -16.63241928686232
  episode_reward_min: -210.62188438252508
  episodes_this_iter: 7
  episodes_total: 787
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.1920929132713809e-08
          cur_lr: 4.999999873689376e-05
          entropy: 1.2084932327270508
          entropy_coeff: 0.0
          kl: 0.002675185911357403
          model: {}
          policy_loss: -0.008860700763761997
          total_loss: 9.036487579345703
          vf_explained_var: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,32,538.979,128000,-16.6324,71.8223,-210.622,375.41


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,32,538.979,128000,-16.6324,71.8223,-210.622,375.41


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,32,538.979,128000,-16.6324,71.8223,-210.622,375.41


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 132000
  counters:
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_env_steps_sampled: 132000
    num_env_steps_trained: 132000
  custom_metrics: {}
  date: 2022-07-23_21-18-40
  done: false
  episode_len_mean: 401.56
  episode_media: {}
  episode_reward_max: 71.8223251864785
  episode_reward_mean: -23.527556554339597
  episode_reward_min: -225.88751068800588
  episodes_this_iter: 11
  episodes_total: 798
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 5.9604645663569045e-09
          cur_lr: 4.999999873689376e-05
          entropy: 1.1752960681915283
          entropy_coeff: 0.0
          kl: 0.0008051434415392578
          model: {}
          policy_loss: 0.002052492229267955
          total_loss: 9.006556510925293
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,33,556.155,132000,-23.5276,71.8223,-225.888,401.56


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,33,556.155,132000,-23.5276,71.8223,-225.888,401.56


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,33,556.155,132000,-23.5276,71.8223,-225.888,401.56


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 136000
  counters:
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_env_steps_sampled: 136000
    num_env_steps_trained: 136000
  custom_metrics: {}
  date: 2022-07-23_21-18-58
  done: false
  episode_len_mean: 405.28
  episode_media: {}
  episode_reward_max: 71.8223251864785
  episode_reward_mean: -24.181434690537653
  episode_reward_min: -225.88751068800588
  episodes_this_iter: 8
  episodes_total: 806
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.9802322831784522e-09
          cur_lr: 4.999999873689376e-05
          entropy: 1.197104811668396
          entropy_coeff: 0.0
          kl: 0.0006379132391884923
          model: {}
          policy_loss: 0.00018485156761016697
          total_loss: 9.073084831237793
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,34,573.626,136000,-24.1814,71.8223,-225.888,405.28


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,34,573.626,136000,-24.1814,71.8223,-225.888,405.28


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,34,573.626,136000,-24.1814,71.8223,-225.888,405.28


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 140000
  counters:
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_env_steps_sampled: 140000
    num_env_steps_trained: 140000
  custom_metrics: {}
  date: 2022-07-23_21-19-17
  done: false
  episode_len_mean: 424.03
  episode_media: {}
  episode_reward_max: 91.80194528794858
  episode_reward_mean: -20.900264187502152
  episode_reward_min: -225.88751068800588
  episodes_this_iter: 6
  episodes_total: 812
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.4901161415892261e-09
          cur_lr: 4.999999873689376e-05
          entropy: 1.2446908950805664
          entropy_coeff: 0.0
          kl: 0.0011422124225646257
          model: {}
          policy_loss: -0.0038336331490427256
          total_loss: 8.793726921081543
          vf_explained_va

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,35,592.752,140000,-20.9003,91.8019,-225.888,424.03


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,35,592.752,140000,-20.9003,91.8019,-225.888,424.03


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,35,592.752,140000,-20.9003,91.8019,-225.888,424.03


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 144000
  counters:
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000
    num_env_steps_sampled: 144000
    num_env_steps_trained: 144000
  custom_metrics: {}
  date: 2022-07-23_21-19-35
  done: false
  episode_len_mean: 444.05
  episode_media: {}
  episode_reward_max: 91.80194528794858
  episode_reward_mean: -18.911884313614546
  episode_reward_min: -225.88751068800588
  episodes_this_iter: 10
  episodes_total: 822
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 7.450580707946131e-10
          cur_lr: 4.999999873689376e-05
          entropy: 1.1781376600265503
          entropy_coeff: 0.0
          kl: 0.003410374978557229
          model: {}
          policy_loss: -0.0013150143204256892
          total_loss: 9.097816467285156
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,36,610.97,144000,-18.9119,91.8019,-225.888,444.05


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,36,610.97,144000,-18.9119,91.8019,-225.888,444.05


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,36,610.97,144000,-18.9119,91.8019,-225.888,444.05


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 148000
  counters:
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_env_steps_sampled: 148000
    num_env_steps_trained: 148000
  custom_metrics: {}
  date: 2022-07-23_21-19-54
  done: false
  episode_len_mean: 472.53
  episode_media: {}
  episode_reward_max: 91.80194528794858
  episode_reward_mean: -17.23432638827097
  episode_reward_min: -225.88751068800588
  episodes_this_iter: 4
  episodes_total: 826
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.7252903539730653e-10
          cur_lr: 4.999999873689376e-05
          entropy: 1.236035943031311
          entropy_coeff: 0.0
          kl: 0.0026648433413356543
          model: {}
          policy_loss: -0.006845870986580849
          total_loss: 8.875152587890625
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,37,629.625,148000,-17.2343,91.8019,-225.888,472.53


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,37,629.625,148000,-17.2343,91.8019,-225.888,472.53


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,37,629.625,148000,-17.2343,91.8019,-225.888,472.53


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 152000
  counters:
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_env_steps_sampled: 152000
    num_env_steps_trained: 152000
  custom_metrics: {}
  date: 2022-07-23_21-20-13
  done: false
  episode_len_mean: 488.46
  episode_media: {}
  episode_reward_max: 91.80194528794858
  episode_reward_mean: -15.466722341452726
  episode_reward_min: -225.88751068800588
  episodes_this_iter: 5
  episodes_total: 831
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.8626451769865326e-10
          cur_lr: 4.999999873689376e-05
          entropy: 1.2597880363464355
          entropy_coeff: 0.0
          kl: 0.0013713076477870345
          model: {}
          policy_loss: -0.0021316558122634888
          total_loss: 9.06042194366455
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,38,648.913,152000,-15.4667,91.8019,-225.888,488.46


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,38,648.913,152000,-15.4667,91.8019,-225.888,488.46


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,38,648.913,152000,-15.4667,91.8019,-225.888,488.46


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 156000
  counters:
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_env_steps_sampled: 156000
    num_env_steps_trained: 156000
  custom_metrics: {}
  date: 2022-07-23_21-20-31
  done: false
  episode_len_mean: 504.76
  episode_media: {}
  episode_reward_max: 91.80194528794858
  episode_reward_mean: -13.136174325416798
  episode_reward_min: -225.88751068800588
  episodes_this_iter: 8
  episodes_total: 839
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 9.313225884932663e-11
          cur_lr: 4.999999873689376e-05
          entropy: 1.2265633344650269
          entropy_coeff: 0.0
          kl: 0.0033297317568212748
          model: {}
          policy_loss: -0.0003786309971474111
          total_loss: 9.106474876403809
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,39,666.34,156000,-13.1362,91.8019,-225.888,504.76


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,39,666.34,156000,-13.1362,91.8019,-225.888,504.76


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,39,666.34,156000,-13.1362,91.8019,-225.888,504.76


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 160000
  counters:
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_env_steps_sampled: 160000
    num_env_steps_trained: 160000
  custom_metrics: {}
  date: 2022-07-23_21-20-49
  done: false
  episode_len_mean: 499.08
  episode_media: {}
  episode_reward_max: 91.80194528794858
  episode_reward_mean: -9.180921373515233
  episode_reward_min: -225.88751068800588
  episodes_this_iter: 9
  episodes_total: 848
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.6566129424663316e-11
          cur_lr: 4.999999873689376e-05
          entropy: 1.1892279386520386
          entropy_coeff: 0.0
          kl: 0.0015808320604264736
          model: {}
          policy_loss: -0.0021511728409677744
          total_loss: 9.068413734436035
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,40,684.599,160000,-9.18092,91.8019,-225.888,499.08


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,40,684.599,160000,-9.18092,91.8019,-225.888,499.08


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,40,684.599,160000,-9.18092,91.8019,-225.888,499.08


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 164000
  counters:
    num_agent_steps_sampled: 164000
    num_agent_steps_trained: 164000
    num_env_steps_sampled: 164000
    num_env_steps_trained: 164000
  custom_metrics: {}
  date: 2022-07-23_21-21-09
  done: false
  episode_len_mean: 514.4
  episode_media: {}
  episode_reward_max: 91.80194528794858
  episode_reward_mean: -9.695602084981395
  episode_reward_min: -225.88751068800588
  episodes_this_iter: 6
  episodes_total: 854
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.3283064712331658e-11
          cur_lr: 4.999999873689376e-05
          entropy: 1.2263067960739136
          entropy_coeff: 0.0
          kl: 0.0012763170525431633
          model: {}
          policy_loss: -0.0021441932767629623
          total_loss: 8.891088485717773
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,41,704.107,164000,-9.6956,91.8019,-225.888,514.4


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,41,704.107,164000,-9.6956,91.8019,-225.888,514.4


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,41,704.107,164000,-9.6956,91.8019,-225.888,514.4


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 168000
  counters:
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 168000
    num_env_steps_sampled: 168000
    num_env_steps_trained: 168000
  custom_metrics: {}
  date: 2022-07-23_21-21-27
  done: false
  episode_len_mean: 520.51
  episode_media: {}
  episode_reward_max: 91.80194528794858
  episode_reward_mean: -12.771169813460967
  episode_reward_min: -225.88751068800588
  episodes_this_iter: 6
  episodes_total: 860
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.1641532356165829e-11
          cur_lr: 4.999999873689376e-05
          entropy: 1.2021095752716064
          entropy_coeff: 0.0
          kl: 0.0020082611590623856
          model: {}
          policy_loss: -0.0014423339162021875
          total_loss: 9.20779800415039
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,42,722.489,168000,-12.7712,91.8019,-225.888,520.51


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,42,722.489,168000,-12.7712,91.8019,-225.888,520.51


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,42,722.489,168000,-12.7712,91.8019,-225.888,520.51


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 172000
  counters:
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_env_steps_sampled: 172000
    num_env_steps_trained: 172000
  custom_metrics: {}
  date: 2022-07-23_21-21-45
  done: false
  episode_len_mean: 548.25
  episode_media: {}
  episode_reward_max: 103.66292073084935
  episode_reward_mean: -11.614815160530734
  episode_reward_min: -225.88751068800588
  episodes_this_iter: 9
  episodes_total: 869
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 5.8207661780829145e-12
          cur_lr: 4.999999873689376e-05
          entropy: 1.2034820318222046
          entropy_coeff: 0.0
          kl: 0.0015770412283018231
          model: {}
          policy_loss: -0.0013647516025230289
          total_loss: 9.072129249572754
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,43,739.922,172000,-11.6148,103.663,-225.888,548.25


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,43,739.922,172000,-11.6148,103.663,-225.888,548.25


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,43,739.922,172000,-11.6148,103.663,-225.888,548.25


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 176000
  counters:
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 176000
    num_env_steps_sampled: 176000
    num_env_steps_trained: 176000
  custom_metrics: {}
  date: 2022-07-23_21-22-01
  done: false
  episode_len_mean: 522.04
  episode_media: {}
  episode_reward_max: 103.66292073084935
  episode_reward_mean: -10.997773717825394
  episode_reward_min: -225.88751068800588
  episodes_this_iter: 11
  episodes_total: 880
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.9103830890414573e-12
          cur_lr: 4.999999873689376e-05
          entropy: 1.1600096225738525
          entropy_coeff: 0.0
          kl: 0.0008133198134601116
          model: {}
          policy_loss: -0.0036678158212453127
          total_loss: 9.260270118713379
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,44,756.598,176000,-10.9978,103.663,-225.888,522.04


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,44,756.598,176000,-10.9978,103.663,-225.888,522.04


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,44,756.598,176000,-10.9978,103.663,-225.888,522.04


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 180000
  counters:
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_env_steps_sampled: 180000
    num_env_steps_trained: 180000
  custom_metrics: {}
  date: 2022-07-23_21-22-20
  done: false
  episode_len_mean: 514.86
  episode_media: {}
  episode_reward_max: 171.990563750072
  episode_reward_mean: -9.224167947824832
  episode_reward_min: -225.88751068800588
  episodes_this_iter: 7
  episodes_total: 887
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.4551915445207286e-12
          cur_lr: 4.999999873689376e-05
          entropy: 1.1841884851455688
          entropy_coeff: 0.0
          kl: 0.0028668695595115423
          model: {}
          policy_loss: -0.007841464132070541
          total_loss: 8.812925338745117
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,45,774.704,180000,-9.22417,171.991,-225.888,514.86


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,45,774.704,180000,-9.22417,171.991,-225.888,514.86


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,45,774.704,180000,-9.22417,171.991,-225.888,514.86


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 184000
  counters:
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_env_steps_sampled: 184000
    num_env_steps_trained: 184000
  custom_metrics: {}
  date: 2022-07-23_21-22-38
  done: false
  episode_len_mean: 515.68
  episode_media: {}
  episode_reward_max: 171.990563750072
  episode_reward_mean: -4.886872587565286
  episode_reward_min: -209.1934063444032
  episodes_this_iter: 8
  episodes_total: 895
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 7.275957722603643e-13
          cur_lr: 4.999999873689376e-05
          entropy: 1.210113286972046
          entropy_coeff: 0.0
          kl: 0.0015062983147799969
          model: {}
          policy_loss: -0.0065412819385528564
          total_loss: 9.165597915649414
          vf_explained_var: 0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,46,792.85,184000,-4.88687,171.991,-209.193,515.68


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,46,792.85,184000,-4.88687,171.991,-209.193,515.68


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,46,792.85,184000,-4.88687,171.991,-209.193,515.68


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 188000
  counters:
    num_agent_steps_sampled: 188000
    num_agent_steps_trained: 188000
    num_env_steps_sampled: 188000
    num_env_steps_trained: 188000
  custom_metrics: {}
  date: 2022-07-23_21-22-55
  done: false
  episode_len_mean: 517.73
  episode_media: {}
  episode_reward_max: 171.990563750072
  episode_reward_mean: 1.7197023027418334
  episode_reward_min: -201.04153577917077
  episodes_this_iter: 10
  episodes_total: 905
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.6379788613018216e-13
          cur_lr: 4.999999873689376e-05
          entropy: 1.1770676374435425
          entropy_coeff: 0.0
          kl: 0.0016462161438539624
          model: {}
          policy_loss: -0.005583952646702528
          total_loss: 9.01079273223877
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,47,810.378,188000,1.7197,171.991,-201.042,517.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,47,810.378,188000,1.7197,171.991,-201.042,517.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,47,810.378,188000,1.7197,171.991,-201.042,517.73


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 192000
  counters:
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_env_steps_sampled: 192000
    num_env_steps_trained: 192000
  custom_metrics: {}
  date: 2022-07-23_21-23-13
  done: false
  episode_len_mean: 508.92
  episode_media: {}
  episode_reward_max: 171.990563750072
  episode_reward_mean: -0.493721735303713
  episode_reward_min: -208.8469327713657
  episodes_this_iter: 11
  episodes_total: 916
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.8189894306509108e-13
          cur_lr: 4.999999873689376e-05
          entropy: 1.1083292961120605
          entropy_coeff: 0.0
          kl: 0.002355261007323861
          model: {}
          policy_loss: -0.005281675606966019
          total_loss: 9.068474769592285
          vf_explained_var: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,48,828.261,192000,-0.493722,171.991,-208.847,508.92


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,48,828.261,192000,-0.493722,171.991,-208.847,508.92


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,48,828.261,192000,-0.493722,171.991,-208.847,508.92


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 196000
  counters:
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 196000
    num_env_steps_sampled: 196000
    num_env_steps_trained: 196000
  custom_metrics: {}
  date: 2022-07-23_21-23-32
  done: false
  episode_len_mean: 523.31
  episode_media: {}
  episode_reward_max: 171.990563750072
  episode_reward_mean: 3.3353002950627575
  episode_reward_min: -208.8469327713657
  episodes_this_iter: 5
  episodes_total: 921
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 9.094947153254554e-14
          cur_lr: 4.999999873689376e-05
          entropy: 1.2423670291900635
          entropy_coeff: 0.0
          kl: 0.0011389596620574594
          model: {}
          policy_loss: -0.0016113942256197333
          total_loss: 8.802534103393555
          vf_explained_var: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,49,847.18,196000,3.3353,171.991,-208.847,523.31


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,49,847.18,196000,3.3353,171.991,-208.847,523.31


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,49,847.18,196000,3.3353,171.991,-208.847,523.31


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 200000
  counters:
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_env_steps_sampled: 200000
    num_env_steps_trained: 200000
  custom_metrics: {}
  date: 2022-07-23_21-23-52
  done: false
  episode_len_mean: 521.66
  episode_media: {}
  episode_reward_max: 171.990563750072
  episode_reward_mean: -0.12881732185735814
  episode_reward_min: -246.75034230845156
  episodes_this_iter: 4
  episodes_total: 925
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.547473576627277e-14
          cur_lr: 4.999999873689376e-05
          entropy: 1.2275125980377197
          entropy_coeff: 0.0
          kl: 0.0017111619235947728
          model: {}
          policy_loss: -0.005790517199784517
          total_loss: 8.986483573913574
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,50,866.603,200000,-0.128817,171.991,-246.75,521.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,50,866.603,200000,-0.128817,171.991,-246.75,521.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,50,866.603,200000,-0.128817,171.991,-246.75,521.66


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 204000
  counters:
    num_agent_steps_sampled: 204000
    num_agent_steps_trained: 204000
    num_env_steps_sampled: 204000
    num_env_steps_trained: 204000
  custom_metrics: {}
  date: 2022-07-23_21-24-11
  done: false
  episode_len_mean: 508.37
  episode_media: {}
  episode_reward_max: 171.990563750072
  episode_reward_mean: 4.127917502280245
  episode_reward_min: -246.75034230845156
  episodes_this_iter: 9
  episodes_total: 934
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.2737367883136385e-14
          cur_lr: 4.999999873689376e-05
          entropy: 1.2190204858779907
          entropy_coeff: 0.0
          kl: 0.0013750243233516812
          model: {}
          policy_loss: 0.0015023719752207398
          total_loss: 8.972589492797852
          vf_explained_var: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,51,885.585,204000,4.12792,171.991,-246.75,508.37


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,51,885.585,204000,4.12792,171.991,-246.75,508.37


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,51,885.585,204000,4.12792,171.991,-246.75,508.37


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 208000
  counters:
    num_agent_steps_sampled: 208000
    num_agent_steps_trained: 208000
    num_env_steps_sampled: 208000
    num_env_steps_trained: 208000
  custom_metrics: {}
  date: 2022-07-23_21-24-30
  done: false
  episode_len_mean: 498.4
  episode_media: {}
  episode_reward_max: 171.990563750072
  episode_reward_mean: 7.076695945147828
  episode_reward_min: -246.75034230845156
  episodes_this_iter: 9
  episodes_total: 943
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.1368683941568192e-14
          cur_lr: 4.999999873689376e-05
          entropy: 1.2116297483444214
          entropy_coeff: 0.0
          kl: 0.0029041776433587074
          model: {}
          policy_loss: -0.0010738254059106112
          total_loss: 9.18506145477295
          vf_explained_var: 0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,52,904.891,208000,7.0767,171.991,-246.75,498.4


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,52,904.891,208000,7.0767,171.991,-246.75,498.4


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,52,904.891,208000,7.0767,171.991,-246.75,498.4


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,52,904.891,208000,7.0767,171.991,-246.75,498.4


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 212000
  counters:
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000
    num_env_steps_sampled: 212000
    num_env_steps_trained: 212000
  custom_metrics: {}
  date: 2022-07-23_21-24-51
  done: false
  episode_len_mean: 521.82
  episode_media: {}
  episode_reward_max: 171.990563750072
  episode_reward_mean: 8.212887540833563
  episode_reward_min: -246.75034230845156
  episodes_this_iter: 5
  episodes_total: 948
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 5.684341970784096e-15
          cur_lr: 4.999999873689376e-05
          entropy: 1.204121470451355
          entropy_coeff: 0.0
          kl: 0.0023360326886177063
          model: {}
          policy_loss: -0.006471593864262104
          total_loss: 9.146639823913574
          vf_explained_var: 0.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,53,925.579,212000,8.21289,171.991,-246.75,521.82


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,53,925.579,212000,8.21289,171.991,-246.75,521.82


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,53,925.579,212000,8.21289,171.991,-246.75,521.82


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,53,925.579,212000,8.21289,171.991,-246.75,521.82


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 216000
  counters:
    num_agent_steps_sampled: 216000
    num_agent_steps_trained: 216000
    num_env_steps_sampled: 216000
    num_env_steps_trained: 216000
  custom_metrics: {}
  date: 2022-07-23_21-25-11
  done: false
  episode_len_mean: 521.83
  episode_media: {}
  episode_reward_max: 171.990563750072
  episode_reward_mean: 11.081027245860463
  episode_reward_min: -246.75034230845156
  episodes_this_iter: 6
  episodes_total: 954
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.842170985392048e-15
          cur_lr: 4.999999873689376e-05
          entropy: 1.200403094291687
          entropy_coeff: 0.0
          kl: 0.00437043746933341
          model: {}
          policy_loss: -0.004892888478934765
          total_loss: 8.71901798248291
          vf_explained_var: 0.220

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,54,945.955,216000,11.081,171.991,-246.75,521.83


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,54,945.955,216000,11.081,171.991,-246.75,521.83


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,54,945.955,216000,11.081,171.991,-246.75,521.83


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 220000
  counters:
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_env_steps_sampled: 220000
    num_env_steps_trained: 220000
  custom_metrics: {}
  date: 2022-07-23_21-25-30
  done: false
  episode_len_mean: 516.16
  episode_media: {}
  episode_reward_max: 171.990563750072
  episode_reward_mean: 13.544963892548179
  episode_reward_min: -246.75034230845156
  episodes_this_iter: 7
  episodes_total: 961
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.421085492696024e-15
          cur_lr: 4.999999873689376e-05
          entropy: 1.1737607717514038
          entropy_coeff: 0.0
          kl: 0.0006237474735826254
          model: {}
          policy_loss: -0.004797604866325855
          total_loss: 9.040234565734863
          vf_explained_var: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,55,964.134,220000,13.545,171.991,-246.75,516.16


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,55,964.134,220000,13.545,171.991,-246.75,516.16


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,55,964.134,220000,13.545,171.991,-246.75,516.16


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 224000
  counters:
    num_agent_steps_sampled: 224000
    num_agent_steps_trained: 224000
    num_env_steps_sampled: 224000
    num_env_steps_trained: 224000
  custom_metrics: {}
  date: 2022-07-23_21-25-49
  done: false
  episode_len_mean: 530.68
  episode_media: {}
  episode_reward_max: 171.990563750072
  episode_reward_mean: 13.397675997379995
  episode_reward_min: -246.75034230845156
  episodes_this_iter: 5
  episodes_total: 966
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 7.10542746348012e-16
          cur_lr: 4.999999873689376e-05
          entropy: 1.1972920894622803
          entropy_coeff: 0.0
          kl: 0.000872679112944752
          model: {}
          policy_loss: -0.006181132979691029
          total_loss: 8.819292068481445
          vf_explained_var: 0.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,56,983.436,224000,13.3977,171.991,-246.75,530.68


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,56,983.436,224000,13.3977,171.991,-246.75,530.68


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,56,983.436,224000,13.3977,171.991,-246.75,530.68


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 228000
  counters:
    num_agent_steps_sampled: 228000
    num_agent_steps_trained: 228000
    num_env_steps_sampled: 228000
    num_env_steps_trained: 228000
  custom_metrics: {}
  date: 2022-07-23_21-26-08
  done: false
  episode_len_mean: 546.24
  episode_media: {}
  episode_reward_max: 171.990563750072
  episode_reward_mean: 15.007192378511695
  episode_reward_min: -246.75034230845156
  episodes_this_iter: 4
  episodes_total: 970
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.55271373174006e-16
          cur_lr: 4.999999873689376e-05
          entropy: 1.2543498277664185
          entropy_coeff: 0.0
          kl: 0.0015604224754497409
          model: {}
          policy_loss: -0.0044571333564817905
          total_loss: 8.829520225524902
          vf_explained_var: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,57,1002.04,228000,15.0072,171.991,-246.75,546.24


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,57,1002.04,228000,15.0072,171.991,-246.75,546.24


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,57,1002.04,228000,15.0072,171.991,-246.75,546.24


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 232000
  counters:
    num_agent_steps_sampled: 232000
    num_agent_steps_trained: 232000
    num_env_steps_sampled: 232000
    num_env_steps_trained: 232000
  custom_metrics: {}
  date: 2022-07-23_21-26-26
  done: false
  episode_len_mean: 564.0
  episode_media: {}
  episode_reward_max: 171.990563750072
  episode_reward_mean: 13.97559816713641
  episode_reward_min: -246.75034230845156
  episodes_this_iter: 8
  episodes_total: 978
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.77635686587003e-16
          cur_lr: 4.999999873689376e-05
          entropy: 1.2097268104553223
          entropy_coeff: 0.0
          kl: 0.003077570116147399
          model: {}
          policy_loss: -0.00040702667320147157
          total_loss: 8.953238487243652
          vf_explained_var: 0.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,58,1020.81,232000,13.9756,171.991,-246.75,564


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,58,1020.81,232000,13.9756,171.991,-246.75,564


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,58,1020.81,232000,13.9756,171.991,-246.75,564


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 236000
  counters:
    num_agent_steps_sampled: 236000
    num_agent_steps_trained: 236000
    num_env_steps_sampled: 236000
    num_env_steps_trained: 236000
  custom_metrics: {}
  date: 2022-07-23_21-26-45
  done: false
  episode_len_mean: 575.5
  episode_media: {}
  episode_reward_max: 114.50803999619158
  episode_reward_mean: 14.636904421120267
  episode_reward_min: -246.75034230845156
  episodes_this_iter: 7
  episodes_total: 985
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 8.88178432935015e-17
          cur_lr: 4.999999873689376e-05
          entropy: 1.2320643663406372
          entropy_coeff: 0.0
          kl: 0.0016952004516497254
          model: {}
          policy_loss: -0.00409043300896883
          total_loss: 8.899578094482422
          vf_explained_var: 0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,59,1038.88,236000,14.6369,114.508,-246.75,575.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,59,1038.88,236000,14.6369,114.508,-246.75,575.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,59,1038.88,236000,14.6369,114.508,-246.75,575.5


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 240000
  counters:
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_env_steps_sampled: 240000
    num_env_steps_trained: 240000
  custom_metrics: {}
  date: 2022-07-23_21-27-02
  done: false
  episode_len_mean: 584.38
  episode_media: {}
  episode_reward_max: 114.50803999619158
  episode_reward_mean: 16.237775826271683
  episode_reward_min: -246.75034230845156
  episodes_this_iter: 6
  episodes_total: 991
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.440892164675075e-17
          cur_lr: 4.999999873689376e-05
          entropy: 1.2665355205535889
          entropy_coeff: 0.0
          kl: 0.0062767937779426575
          model: {}
          policy_loss: -0.005473737604916096
          total_loss: 8.560721397399902
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,60,1056.4,240000,16.2378,114.508,-246.75,584.38


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,60,1056.4,240000,16.2378,114.508,-246.75,584.38


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,60,1056.4,240000,16.2378,114.508,-246.75,584.38


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 244000
  counters:
    num_agent_steps_sampled: 244000
    num_agent_steps_trained: 244000
    num_env_steps_sampled: 244000
    num_env_steps_trained: 244000
  custom_metrics: {}
  date: 2022-07-23_21-27-20
  done: false
  episode_len_mean: 601.54
  episode_media: {}
  episode_reward_max: 165.5136655142816
  episode_reward_mean: 20.299177593209375
  episode_reward_min: -246.75034230845156
  episodes_this_iter: 4
  episodes_total: 995
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.440892164675075e-17
          cur_lr: 4.999999873689376e-05
          entropy: 1.1596059799194336
          entropy_coeff: 0.0
          kl: 0.0016240638215094805
          model: {}
          policy_loss: -0.007406509947031736
          total_loss: 8.466790199279785
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,61,1074.18,244000,20.2992,165.514,-246.75,601.54


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,61,1074.18,244000,20.2992,165.514,-246.75,601.54


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,61,1074.18,244000,20.2992,165.514,-246.75,601.54


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 248000
  counters:
    num_agent_steps_sampled: 248000
    num_agent_steps_trained: 248000
    num_env_steps_sampled: 248000
    num_env_steps_trained: 248000
  custom_metrics: {}
  date: 2022-07-23_21-27-38
  done: false
  episode_len_mean: 618.1
  episode_media: {}
  episode_reward_max: 165.5136655142816
  episode_reward_mean: 21.5273987504466
  episode_reward_min: -246.75034230845156
  episodes_this_iter: 4
  episodes_total: 999
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.2204460823375376e-17
          cur_lr: 4.999999873689376e-05
          entropy: 1.227711796760559
          entropy_coeff: 0.0
          kl: 0.0005799869541078806
          model: {}
          policy_loss: -0.002204385818913579
          total_loss: 7.351563453674316
          vf_explained_var: 0.3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,62,1091.64,248000,21.5274,165.514,-246.75,618.1


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,62,1091.64,248000,21.5274,165.514,-246.75,618.1


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,62,1091.64,248000,21.5274,165.514,-246.75,618.1


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 252000
  counters:
    num_agent_steps_sampled: 252000
    num_agent_steps_trained: 252000
    num_env_steps_sampled: 252000
    num_env_steps_trained: 252000
  custom_metrics: {}
  date: 2022-07-23_21-27-56
  done: false
  episode_len_mean: 642.47
  episode_media: {}
  episode_reward_max: 165.5136655142816
  episode_reward_mean: 24.95969885331078
  episode_reward_min: -246.75034230845156
  episodes_this_iter: 4
  episodes_total: 1003
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.1102230411687688e-17
          cur_lr: 4.999999873689376e-05
          entropy: 1.2350239753723145
          entropy_coeff: 0.0
          kl: 0.002988745691254735
          model: {}
          policy_loss: -0.00492419907823205
          total_loss: 7.513585567474365
          vf_explained_var: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,63,1109.71,252000,24.9597,165.514,-246.75,642.47


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,63,1109.71,252000,24.9597,165.514,-246.75,642.47


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,63,1109.71,252000,24.9597,165.514,-246.75,642.47


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 256000
  counters:
    num_agent_steps_sampled: 256000
    num_agent_steps_trained: 256000
    num_env_steps_sampled: 256000
    num_env_steps_trained: 256000
  custom_metrics: {}
  date: 2022-07-23_21-28-14
  done: false
  episode_len_mean: 667.84
  episode_media: {}
  episode_reward_max: 165.5136655142816
  episode_reward_mean: 26.92245730979822
  episode_reward_min: -246.75034230845156
  episodes_this_iter: 5
  episodes_total: 1008
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 5.551115205843844e-18
          cur_lr: 4.999999873689376e-05
          entropy: 1.2021108865737915
          entropy_coeff: 0.0
          kl: 0.00041635779780335724
          model: {}
          policy_loss: -0.00858867634087801
          total_loss: 6.950072765350342
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,64,1127.58,256000,26.9225,165.514,-246.75,667.84


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,64,1127.58,256000,26.9225,165.514,-246.75,667.84


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,64,1127.58,256000,26.9225,165.514,-246.75,667.84


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 260000
  counters:
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_env_steps_sampled: 260000
    num_env_steps_trained: 260000
  custom_metrics: {}
  date: 2022-07-23_21-28-30
  done: false
  episode_len_mean: 606.54
  episode_media: {}
  episode_reward_max: 165.5136655142816
  episode_reward_mean: 33.59922522904377
  episode_reward_min: -189.84152090910777
  episodes_this_iter: 17
  episodes_total: 1025
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.775557602921922e-18
          cur_lr: 4.999999873689376e-05
          entropy: 1.0536447763442993
          entropy_coeff: 0.0
          kl: 0.0022652652114629745
          model: {}
          policy_loss: -0.004936058074235916
          total_loss: 8.445894241333008
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,65,1143.63,260000,33.5992,165.514,-189.842,606.54


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,65,1143.63,260000,33.5992,165.514,-189.842,606.54


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,65,1143.63,260000,33.5992,165.514,-189.842,606.54


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 264000
  counters:
    num_agent_steps_sampled: 264000
    num_agent_steps_trained: 264000
    num_env_steps_sampled: 264000
    num_env_steps_trained: 264000
  custom_metrics: {}
  date: 2022-07-23_21-28-47
  done: false
  episode_len_mean: 611.24
  episode_media: {}
  episode_reward_max: 176.85180935319576
  episode_reward_mean: 36.88018057703728
  episode_reward_min: -189.84152090910777
  episodes_this_iter: 5
  episodes_total: 1030
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.387778801460961e-18
          cur_lr: 4.999999873689376e-05
          entropy: 1.16560697555542
          entropy_coeff: 0.0
          kl: 0.0026541310362517834
          model: {}
          policy_loss: -0.008040983229875565
          total_loss: 7.4260454177856445
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,66,1160.69,264000,36.8802,176.852,-189.842,611.24


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,66,1160.69,264000,36.8802,176.852,-189.842,611.24


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,66,1160.69,264000,36.8802,176.852,-189.842,611.24


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 268000
  counters:
    num_agent_steps_sampled: 268000
    num_agent_steps_trained: 268000
    num_env_steps_sampled: 268000
    num_env_steps_trained: 268000
  custom_metrics: {}
  date: 2022-07-23_21-29-03
  done: false
  episode_len_mean: 619.33
  episode_media: {}
  episode_reward_max: 176.85180935319576
  episode_reward_mean: 38.425181932197134
  episode_reward_min: -189.84152090910777
  episodes_this_iter: 6
  episodes_total: 1036
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 6.938894007304805e-19
          cur_lr: 4.999999873689376e-05
          entropy: 1.163847804069519
          entropy_coeff: 0.0
          kl: 0.0004582736873999238
          model: {}
          policy_loss: -0.0053939735516905785
          total_loss: 6.890970706939697
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,67,1177.14,268000,38.4252,176.852,-189.842,619.33


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,67,1177.14,268000,38.4252,176.852,-189.842,619.33


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,67,1177.14,268000,38.4252,176.852,-189.842,619.33


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 272000
  counters:
    num_agent_steps_sampled: 272000
    num_agent_steps_trained: 272000
    num_env_steps_sampled: 272000
    num_env_steps_trained: 272000
  custom_metrics: {}
  date: 2022-07-23_21-29-20
  done: false
  episode_len_mean: 627.9
  episode_media: {}
  episode_reward_max: 176.85180935319576
  episode_reward_mean: 40.89156956774719
  episode_reward_min: -189.84152090910777
  episodes_this_iter: 8
  episodes_total: 1044
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.4694470036524025e-19
          cur_lr: 4.999999873689376e-05
          entropy: 1.1516152620315552
          entropy_coeff: 0.0
          kl: 0.0011389617575332522
          model: {}
          policy_loss: -0.0006876919651404023
          total_loss: 7.421054840087891
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,68,1193.77,272000,40.8916,176.852,-189.842,627.9


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,68,1193.77,272000,40.8916,176.852,-189.842,627.9


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,68,1193.77,272000,40.8916,176.852,-189.842,627.9


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 276000
  counters:
    num_agent_steps_sampled: 276000
    num_agent_steps_trained: 276000
    num_env_steps_sampled: 276000
    num_env_steps_trained: 276000
  custom_metrics: {}
  date: 2022-07-23_21-29-37
  done: false
  episode_len_mean: 624.99
  episode_media: {}
  episode_reward_max: 176.85180935319576
  episode_reward_mean: 44.42284674732417
  episode_reward_min: -131.49432851671315
  episodes_this_iter: 6
  episodes_total: 1050
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.7347235018262012e-19
          cur_lr: 4.999999873689376e-05
          entropy: 1.0741145610809326
          entropy_coeff: 0.0
          kl: 0.0005753572331741452
          model: {}
          policy_loss: -0.0011827342677861452
          total_loss: 6.432559490203857
          vf_explained_va

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,69,1210.93,276000,44.4228,176.852,-131.494,624.99


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,69,1210.93,276000,44.4228,176.852,-131.494,624.99


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,69,1210.93,276000,44.4228,176.852,-131.494,624.99


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 280000
  counters:
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_env_steps_sampled: 280000
    num_env_steps_trained: 280000
  custom_metrics: {}
  date: 2022-07-23_21-29-54
  done: false
  episode_len_mean: 589.25
  episode_media: {}
  episode_reward_max: 176.85180935319576
  episode_reward_mean: 46.503634955374935
  episode_reward_min: -69.65962879302738
  episodes_this_iter: 12
  episodes_total: 1062
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 8.673617509131006e-20
          cur_lr: 4.999999873689376e-05
          entropy: 1.066514492034912
          entropy_coeff: 0.0
          kl: 0.0011484401766210794
          model: {}
          policy_loss: -0.002405378269031644
          total_loss: 7.079168796539307
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,70,1227.48,280000,46.5036,176.852,-69.6596,589.25


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,70,1227.48,280000,46.5036,176.852,-69.6596,589.25


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,70,1227.48,280000,46.5036,176.852,-69.6596,589.25


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 284000
  counters:
    num_agent_steps_sampled: 284000
    num_agent_steps_trained: 284000
    num_env_steps_sampled: 284000
    num_env_steps_trained: 284000
  custom_metrics: {}
  date: 2022-07-23_21-30-11
  done: false
  episode_len_mean: 571.38
  episode_media: {}
  episode_reward_max: 176.85180935319576
  episode_reward_mean: 48.24978277567207
  episode_reward_min: -69.65962879302738
  episodes_this_iter: 7
  episodes_total: 1069
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.336808754565503e-20
          cur_lr: 4.999999873689376e-05
          entropy: 1.1068178415298462
          entropy_coeff: 0.0
          kl: 0.008587869815528393
          model: {}
          policy_loss: 0.004278250969946384
          total_loss: 6.859447479248047
          vf_explained_var: 0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,71,1244.72,284000,48.2498,176.852,-69.6596,571.38


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,71,1244.72,284000,48.2498,176.852,-69.6596,571.38


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,71,1244.72,284000,48.2498,176.852,-69.6596,571.38


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 288000
  counters:
    num_agent_steps_sampled: 288000
    num_agent_steps_trained: 288000
    num_env_steps_sampled: 288000
    num_env_steps_trained: 288000
  custom_metrics: {}
  date: 2022-07-23_21-30-29
  done: false
  episode_len_mean: 585.98
  episode_media: {}
  episode_reward_max: 176.85180935319576
  episode_reward_mean: 50.92036287684288
  episode_reward_min: -69.65962879302738
  episodes_this_iter: 5
  episodes_total: 1074
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.336808754565503e-20
          cur_lr: 4.999999873689376e-05
          entropy: 1.0903455018997192
          entropy_coeff: 0.0
          kl: 0.00032048937282525003
          model: {}
          policy_loss: -0.0003127303789369762
          total_loss: 6.522971153259277
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,72,1262.21,288000,50.9204,176.852,-69.6596,585.98


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,72,1262.21,288000,50.9204,176.852,-69.6596,585.98


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,72,1262.21,288000,50.9204,176.852,-69.6596,585.98


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 292000
  counters:
    num_agent_steps_sampled: 292000
    num_agent_steps_trained: 292000
    num_env_steps_sampled: 292000
    num_env_steps_trained: 292000
  custom_metrics: {}
  date: 2022-07-23_21-30-46
  done: false
  episode_len_mean: 582.29
  episode_media: {}
  episode_reward_max: 218.33381471208168
  episode_reward_mean: 53.91421455332616
  episode_reward_min: -164.8475576565493
  episodes_this_iter: 7
  episodes_total: 1081
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.1684043772827515e-20
          cur_lr: 4.999999873689376e-05
          entropy: 1.1361994743347168
          entropy_coeff: 0.0
          kl: 0.0016854888526722789
          model: {}
          policy_loss: -0.0026581590063869953
          total_loss: 8.262979507446289
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,73,1279.86,292000,53.9142,218.334,-164.848,582.29


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,73,1279.86,292000,53.9142,218.334,-164.848,582.29


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,73,1279.86,292000,53.9142,218.334,-164.848,582.29


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 296000
  counters:
    num_agent_steps_sampled: 296000
    num_agent_steps_trained: 296000
    num_env_steps_sampled: 296000
    num_env_steps_trained: 296000
  custom_metrics: {}
  date: 2022-07-23_21-31-03
  done: false
  episode_len_mean: 554.64
  episode_media: {}
  episode_reward_max: 228.71741673770535
  episode_reward_mean: 50.99592293737302
  episode_reward_min: -164.8475576565493
  episodes_this_iter: 9
  episodes_total: 1090
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.0842021886413758e-20
          cur_lr: 4.999999873689376e-05
          entropy: 1.0532333850860596
          entropy_coeff: 0.0
          kl: 0.0015362569829449058
          model: {}
          policy_loss: -0.0053064157254993916
          total_loss: 7.970619201660156
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,74,1296.55,296000,50.9959,228.717,-164.848,554.64


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,74,1296.55,296000,50.9959,228.717,-164.848,554.64


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,74,1296.55,296000,50.9959,228.717,-164.848,554.64


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 300000
  counters:
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_env_steps_sampled: 300000
    num_env_steps_trained: 300000
  custom_metrics: {}
  date: 2022-07-23_21-31-21
  done: false
  episode_len_mean: 546.35
  episode_media: {}
  episode_reward_max: 228.71741673770535
  episode_reward_mean: 52.08909729407229
  episode_reward_min: -164.8475576565493
  episodes_this_iter: 7
  episodes_total: 1097
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 5.421010943206879e-21
          cur_lr: 4.999999873689376e-05
          entropy: 1.1431210041046143
          entropy_coeff: 0.0
          kl: 0.0013524306705221534
          model: {}
          policy_loss: -0.0014309784164652228
          total_loss: 7.022461891174316
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,75,1314.32,300000,52.0891,228.717,-164.848,546.35


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,75,1314.32,300000,52.0891,228.717,-164.848,546.35


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,75,1314.32,300000,52.0891,228.717,-164.848,546.35


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 304000
  counters:
    num_agent_steps_sampled: 304000
    num_agent_steps_trained: 304000
    num_env_steps_sampled: 304000
    num_env_steps_trained: 304000
  custom_metrics: {}
  date: 2022-07-23_21-31-40
  done: false
  episode_len_mean: 538.28
  episode_media: {}
  episode_reward_max: 228.71741673770535
  episode_reward_mean: 53.10359846491338
  episode_reward_min: -164.8475576565493
  episodes_this_iter: 5
  episodes_total: 1102
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.7105054716034394e-21
          cur_lr: 4.999999873689376e-05
          entropy: 1.1848446130752563
          entropy_coeff: 0.0
          kl: 0.0011136605171486735
          model: {}
          policy_loss: -0.003960353787988424
          total_loss: 6.7986931800842285
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,76,1332.94,304000,53.1036,228.717,-164.848,538.28


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,76,1332.94,304000,53.1036,228.717,-164.848,538.28


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,76,1332.94,304000,53.1036,228.717,-164.848,538.28


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 308000
  counters:
    num_agent_steps_sampled: 308000
    num_agent_steps_trained: 308000
    num_env_steps_sampled: 308000
    num_env_steps_trained: 308000
  custom_metrics: {}
  date: 2022-07-23_21-31-57
  done: false
  episode_len_mean: 523.68
  episode_media: {}
  episode_reward_max: 228.71741673770535
  episode_reward_mean: 53.36457298417677
  episode_reward_min: -164.8475576565493
  episodes_this_iter: 7
  episodes_total: 1109
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.3552527358017197e-21
          cur_lr: 4.999999873689376e-05
          entropy: 1.1542396545410156
          entropy_coeff: 0.0
          kl: 0.0014379865024238825
          model: {}
          policy_loss: -0.004039347637444735
          total_loss: 7.656665802001953
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,77,1349.93,308000,53.3646,228.717,-164.848,523.68


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,77,1349.93,308000,53.3646,228.717,-164.848,523.68


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,77,1349.93,308000,53.3646,228.717,-164.848,523.68


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 312000
  counters:
    num_agent_steps_sampled: 312000
    num_agent_steps_trained: 312000
    num_env_steps_sampled: 312000
    num_env_steps_trained: 312000
  custom_metrics: {}
  date: 2022-07-23_21-32-14
  done: false
  episode_len_mean: 548.96
  episode_media: {}
  episode_reward_max: 228.71741673770535
  episode_reward_mean: 57.03864054391047
  episode_reward_min: -164.8475576565493
  episodes_this_iter: 4
  episodes_total: 1113
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 6.776263679008599e-22
          cur_lr: 4.999999873689376e-05
          entropy: 1.1859469413757324
          entropy_coeff: 0.0
          kl: 0.0015436841640621424
          model: {}
          policy_loss: -0.004840174224227667
          total_loss: 7.232687473297119
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,78,1367.5,312000,57.0386,228.717,-164.848,548.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,78,1367.5,312000,57.0386,228.717,-164.848,548.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,78,1367.5,312000,57.0386,228.717,-164.848,548.96


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 316000
  counters:
    num_agent_steps_sampled: 316000
    num_agent_steps_trained: 316000
    num_env_steps_sampled: 316000
    num_env_steps_trained: 316000
  custom_metrics: {}
  date: 2022-07-23_21-32-31
  done: false
  episode_len_mean: 567.81
  episode_media: {}
  episode_reward_max: 228.71741673770535
  episode_reward_mean: 59.81595672199126
  episode_reward_min: -164.8475576565493
  episodes_this_iter: 7
  episodes_total: 1120
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.3881318395042993e-22
          cur_lr: 4.999999873689376e-05
          entropy: 1.1503088474273682
          entropy_coeff: 0.0
          kl: 0.0009478806750848889
          model: {}
          policy_loss: -0.003591818269342184
          total_loss: 6.4561052322387695
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,79,1383.83,316000,59.816,228.717,-164.848,567.81


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,79,1383.83,316000,59.816,228.717,-164.848,567.81


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,79,1383.83,316000,59.816,228.717,-164.848,567.81


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 320000
  counters:
    num_agent_steps_sampled: 320000
    num_agent_steps_trained: 320000
    num_env_steps_sampled: 320000
    num_env_steps_trained: 320000
  custom_metrics: {}
  date: 2022-07-23_21-32-48
  done: false
  episode_len_mean: 585.58
  episode_media: {}
  episode_reward_max: 228.71741673770535
  episode_reward_mean: 60.90477311215399
  episode_reward_min: -164.8475576565493
  episodes_this_iter: 7
  episodes_total: 1127
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.6940659197521496e-22
          cur_lr: 4.999999873689376e-05
          entropy: 1.1423418521881104
          entropy_coeff: 0.0
          kl: 0.001659323344938457
          model: {}
          policy_loss: -0.0054055266082286835
          total_loss: 6.862620830535889
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,80,1400.71,320000,60.9048,228.717,-164.848,585.58


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,80,1400.71,320000,60.9048,228.717,-164.848,585.58


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,80,1400.71,320000,60.9048,228.717,-164.848,585.58


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 324000
  counters:
    num_agent_steps_sampled: 324000
    num_agent_steps_trained: 324000
    num_env_steps_sampled: 324000
    num_env_steps_trained: 324000
  custom_metrics: {}
  date: 2022-07-23_21-33-06
  done: false
  episode_len_mean: 598.01
  episode_media: {}
  episode_reward_max: 228.71741673770535
  episode_reward_mean: 62.10675932110458
  episode_reward_min: -164.8475576565493
  episodes_this_iter: 4
  episodes_total: 1131
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 8.470329598760748e-23
          cur_lr: 4.999999873689376e-05
          entropy: 1.186524510383606
          entropy_coeff: 0.0
          kl: 0.0014151664217934012
          model: {}
          policy_loss: -0.004403545055538416
          total_loss: 6.648592948913574
          vf_explained_var: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,81,1418.51,324000,62.1068,228.717,-164.848,598.01


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,81,1418.51,324000,62.1068,228.717,-164.848,598.01


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,81,1418.51,324000,62.1068,228.717,-164.848,598.01


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 328000
  counters:
    num_agent_steps_sampled: 328000
    num_agent_steps_trained: 328000
    num_env_steps_sampled: 328000
    num_env_steps_trained: 328000
  custom_metrics: {}
  date: 2022-07-23_21-33-23
  done: false
  episode_len_mean: 614.88
  episode_media: {}
  episode_reward_max: 228.71741673770535
  episode_reward_mean: 62.693764343154584
  episode_reward_min: -164.8475576565493
  episodes_this_iter: 4
  episodes_total: 1135
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.235164799380374e-23
          cur_lr: 4.999999873689376e-05
          entropy: 1.060834527015686
          entropy_coeff: 0.0
          kl: 0.0019724511075764894
          model: {}
          policy_loss: -0.005095188040286303
          total_loss: 6.878475189208984
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,82,1435.82,328000,62.6938,228.717,-164.848,614.88


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,82,1435.82,328000,62.6938,228.717,-164.848,614.88


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,82,1435.82,328000,62.6938,228.717,-164.848,614.88


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 332000
  counters:
    num_agent_steps_sampled: 332000
    num_agent_steps_trained: 332000
    num_env_steps_sampled: 332000
    num_env_steps_trained: 332000
  custom_metrics: {}
  date: 2022-07-23_21-33-40
  done: false
  episode_len_mean: 615.64
  episode_media: {}
  episode_reward_max: 228.71741673770535
  episode_reward_mean: 60.91806852779711
  episode_reward_min: -164.8475576565493
  episodes_this_iter: 6
  episodes_total: 1141
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.117582399690187e-23
          cur_lr: 4.999999873689376e-05
          entropy: 1.097103476524353
          entropy_coeff: 0.0
          kl: 0.0015469222562387586
          model: {}
          policy_loss: -0.007842183113098145
          total_loss: 7.177569389343262
          vf_explained_var: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,83,1452.88,332000,60.9181,228.717,-164.848,615.64


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,83,1452.88,332000,60.9181,228.717,-164.848,615.64


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,83,1452.88,332000,60.9181,228.717,-164.848,615.64


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 336000
  counters:
    num_agent_steps_sampled: 336000
    num_agent_steps_trained: 336000
    num_env_steps_sampled: 336000
    num_env_steps_trained: 336000
  custom_metrics: {}
  date: 2022-07-23_21-33-58
  done: false
  episode_len_mean: 625.19
  episode_media: {}
  episode_reward_max: 228.71741673770535
  episode_reward_mean: 61.70839295634633
  episode_reward_min: -164.8475576565493
  episodes_this_iter: 4
  episodes_total: 1145
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.0587911998450935e-23
          cur_lr: 4.999999873689376e-05
          entropy: 1.1230194568634033
          entropy_coeff: 0.0
          kl: 0.004508230835199356
          model: {}
          policy_loss: -0.006248850841075182
          total_loss: 5.925057888031006
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,84,1470.88,336000,61.7084,228.717,-164.848,625.19


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,84,1470.88,336000,61.7084,228.717,-164.848,625.19


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,84,1470.88,336000,61.7084,228.717,-164.848,625.19


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 340000
  counters:
    num_agent_steps_sampled: 340000
    num_agent_steps_trained: 340000
    num_env_steps_sampled: 340000
    num_env_steps_trained: 340000
  custom_metrics: {}
  date: 2022-07-23_21-34-16
  done: false
  episode_len_mean: 628.49
  episode_media: {}
  episode_reward_max: 228.71741673770535
  episode_reward_mean: 58.63627306775221
  episode_reward_min: -164.8475576565493
  episodes_this_iter: 9
  episodes_total: 1154
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 5.293955999225468e-24
          cur_lr: 4.999999873689376e-05
          entropy: 0.9956445097923279
          entropy_coeff: 0.0
          kl: 0.0020844426471740007
          model: {}
          policy_loss: -0.015909388661384583
          total_loss: 6.982019901275635
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,85,1488.26,340000,58.6363,228.717,-164.848,628.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,85,1488.26,340000,58.6363,228.717,-164.848,628.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,85,1488.26,340000,58.6363,228.717,-164.848,628.49


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 344000
  counters:
    num_agent_steps_sampled: 344000
    num_agent_steps_trained: 344000
    num_env_steps_sampled: 344000
    num_env_steps_trained: 344000
  custom_metrics: {}
  date: 2022-07-23_21-34-33
  done: false
  episode_len_mean: 646.59
  episode_media: {}
  episode_reward_max: 228.71741673770535
  episode_reward_mean: 58.750204296826354
  episode_reward_min: -164.8475576565493
  episodes_this_iter: 5
  episodes_total: 1159
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.646977999612734e-24
          cur_lr: 4.999999873689376e-05
          entropy: 1.0990214347839355
          entropy_coeff: 0.0
          kl: 0.0007549879956059158
          model: {}
          policy_loss: -0.0015222367364913225
          total_loss: 5.805439472198486
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,86,1505.25,344000,58.7502,228.717,-164.848,646.59


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,86,1505.25,344000,58.7502,228.717,-164.848,646.59


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,86,1505.25,344000,58.7502,228.717,-164.848,646.59


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 348000
  counters:
    num_agent_steps_sampled: 348000
    num_agent_steps_trained: 348000
    num_env_steps_sampled: 348000
    num_env_steps_trained: 348000
  custom_metrics: {}
  date: 2022-07-23_21-34-50
  done: false
  episode_len_mean: 661.72
  episode_media: {}
  episode_reward_max: 228.71741673770535
  episode_reward_mean: 63.663886744612654
  episode_reward_min: -164.8475576565493
  episodes_this_iter: 4
  episodes_total: 1163
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.323488999806367e-24
          cur_lr: 4.999999873689376e-05
          entropy: 1.0927181243896484
          entropy_coeff: 0.0
          kl: 0.0026572293136268854
          model: {}
          policy_loss: -0.0025172659661620855
          total_loss: 6.64991569519043
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,87,1522.85,348000,63.6639,228.717,-164.848,661.72


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,87,1522.85,348000,63.6639,228.717,-164.848,661.72


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,87,1522.85,348000,63.6639,228.717,-164.848,661.72


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 352000
  counters:
    num_agent_steps_sampled: 352000
    num_agent_steps_trained: 352000
    num_env_steps_sampled: 352000
    num_env_steps_trained: 352000
  custom_metrics: {}
  date: 2022-07-23_21-35-08
  done: false
  episode_len_mean: 679.16
  episode_media: {}
  episode_reward_max: 228.71741673770535
  episode_reward_mean: 66.51128446201204
  episode_reward_min: -164.8475576565493
  episodes_this_iter: 4
  episodes_total: 1167
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 6.617444999031835e-25
          cur_lr: 4.999999873689376e-05
          entropy: 1.1103160381317139
          entropy_coeff: 0.0
          kl: 0.00033676420571282506
          model: {}
          policy_loss: -0.010989013127982616
          total_loss: 5.122103691101074
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,88,1540.7,352000,66.5113,228.717,-164.848,679.16


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,88,1540.7,352000,66.5113,228.717,-164.848,679.16


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,88,1540.7,352000,66.5113,228.717,-164.848,679.16


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 356000
  counters:
    num_agent_steps_sampled: 356000
    num_agent_steps_trained: 356000
    num_env_steps_sampled: 356000
    num_env_steps_trained: 356000
  custom_metrics: {}
  date: 2022-07-23_21-35-26
  done: false
  episode_len_mean: 687.57
  episode_media: {}
  episode_reward_max: 228.71741673770535
  episode_reward_mean: 68.69170218016414
  episode_reward_min: -164.8475576565493
  episodes_this_iter: 4
  episodes_total: 1171
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.3087224995159173e-25
          cur_lr: 4.999999873689376e-05
          entropy: 1.1127840280532837
          entropy_coeff: 0.0
          kl: 0.002950628288090229
          model: {}
          policy_loss: -0.00796597357839346
          total_loss: 4.733006954193115
          vf_explained_var: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,89,1558.4,356000,68.6917,228.717,-164.848,687.57


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,89,1558.4,356000,68.6917,228.717,-164.848,687.57


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,89,1558.4,356000,68.6917,228.717,-164.848,687.57


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 360000
  counters:
    num_agent_steps_sampled: 360000
    num_agent_steps_trained: 360000
    num_env_steps_sampled: 360000
    num_env_steps_trained: 360000
  custom_metrics: {}
  date: 2022-07-23_21-35-44
  done: false
  episode_len_mean: 695.66
  episode_media: {}
  episode_reward_max: 228.71741673770535
  episode_reward_mean: 68.9869144704711
  episode_reward_min: -164.8475576565493
  episodes_this_iter: 6
  episodes_total: 1177
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.6543612497579586e-25
          cur_lr: 4.999999873689376e-05
          entropy: 1.1122232675552368
          entropy_coeff: 0.0
          kl: 0.0049406420439481735
          model: {}
          policy_loss: -0.005979544948786497
          total_loss: 5.5485076904296875
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,90,1576.22,360000,68.9869,228.717,-164.848,695.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,90,1576.22,360000,68.9869,228.717,-164.848,695.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,90,1576.22,360000,68.9869,228.717,-164.848,695.66


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 364000
  counters:
    num_agent_steps_sampled: 364000
    num_agent_steps_trained: 364000
    num_env_steps_sampled: 364000
    num_env_steps_trained: 364000
  custom_metrics: {}
  date: 2022-07-23_21-36-01
  done: false
  episode_len_mean: 718.21
  episode_media: {}
  episode_reward_max: 228.71741673770535
  episode_reward_mean: 72.37743884021859
  episode_reward_min: -159.30991697534444
  episodes_this_iter: 5
  episodes_total: 1182
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 8.271806248789793e-26
          cur_lr: 4.999999873689376e-05
          entropy: 1.0413179397583008
          entropy_coeff: 0.0
          kl: 0.004595835693180561
          model: {}
          policy_loss: -0.0064033037051558495
          total_loss: 5.612113952636719
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,91,1593.44,364000,72.3774,228.717,-159.31,718.21


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,91,1593.44,364000,72.3774,228.717,-159.31,718.21


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,91,1593.44,364000,72.3774,228.717,-159.31,718.21


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 368000
  counters:
    num_agent_steps_sampled: 368000
    num_agent_steps_trained: 368000
    num_env_steps_sampled: 368000
    num_env_steps_trained: 368000
  custom_metrics: {}
  date: 2022-07-23_21-36-19
  done: false
  episode_len_mean: 739.48
  episode_media: {}
  episode_reward_max: 170.35000904358859
  episode_reward_mean: 74.27999331020402
  episode_reward_min: -103.9310456997668
  episodes_this_iter: 5
  episodes_total: 1187
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.1359031243948966e-26
          cur_lr: 4.999999873689376e-05
          entropy: 1.1041587591171265
          entropy_coeff: 0.0
          kl: 0.0022196073550730944
          model: {}
          policy_loss: -0.005000351462513208
          total_loss: 7.215473651885986
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,92,1610.82,368000,74.28,170.35,-103.931,739.48


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,92,1610.82,368000,74.28,170.35,-103.931,739.48


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,92,1610.82,368000,74.28,170.35,-103.931,739.48


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 372000
  counters:
    num_agent_steps_sampled: 372000
    num_agent_steps_trained: 372000
    num_env_steps_sampled: 372000
    num_env_steps_trained: 372000
  custom_metrics: {}
  date: 2022-07-23_21-36-36
  done: false
  episode_len_mean: 747.56
  episode_media: {}
  episode_reward_max: 170.35000904358859
  episode_reward_mean: 76.61403896521323
  episode_reward_min: -103.9310456997668
  episodes_this_iter: 4
  episodes_total: 1191
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.0679515621974483e-26
          cur_lr: 4.999999873689376e-05
          entropy: 0.9229457974433899
          entropy_coeff: 0.0
          kl: 0.0012380443513393402
          model: {}
          policy_loss: -0.005781013518571854
          total_loss: 4.55399227142334
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,93,1628.69,372000,76.614,170.35,-103.931,747.56


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,93,1628.69,372000,76.614,170.35,-103.931,747.56


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,93,1628.69,372000,76.614,170.35,-103.931,747.56


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 376000
  counters:
    num_agent_steps_sampled: 376000
    num_agent_steps_trained: 376000
    num_env_steps_sampled: 376000
    num_env_steps_trained: 376000
  custom_metrics: {}
  date: 2022-07-23_21-36-53
  done: false
  episode_len_mean: 758.45
  episode_media: {}
  episode_reward_max: 213.01129964626418
  episode_reward_mean: 78.94202063223555
  episode_reward_min: -103.9310456997668
  episodes_this_iter: 5
  episodes_total: 1196
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.0339757810987241e-26
          cur_lr: 4.999999873689376e-05
          entropy: 1.005212426185608
          entropy_coeff: 0.0
          kl: 0.003980665467679501
          model: {}
          policy_loss: -0.007884349673986435
          total_loss: 5.885071277618408
          vf_explained_var: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,94,1645.66,376000,78.942,213.011,-103.931,758.45


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,94,1645.66,376000,78.942,213.011,-103.931,758.45


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,94,1645.66,376000,78.942,213.011,-103.931,758.45


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 380000
  counters:
    num_agent_steps_sampled: 380000
    num_agent_steps_trained: 380000
    num_env_steps_sampled: 380000
    num_env_steps_trained: 380000
  custom_metrics: {}
  date: 2022-07-23_21-37-10
  done: false
  episode_len_mean: 766.18
  episode_media: {}
  episode_reward_max: 213.01129964626418
  episode_reward_mean: 80.2431549932571
  episode_reward_min: -103.9310456997668
  episodes_this_iter: 4
  episodes_total: 1200
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 5.169878905493621e-27
          cur_lr: 4.999999873689376e-05
          entropy: 1.0256942510604858
          entropy_coeff: 0.0
          kl: 0.0010436364682391286
          model: {}
          policy_loss: -0.0062141818925738335
          total_loss: 4.31348991394043
          vf_explained_var: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,95,1662.56,380000,80.2432,213.011,-103.931,766.18


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,95,1662.56,380000,80.2432,213.011,-103.931,766.18


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,95,1662.56,380000,80.2432,213.011,-103.931,766.18


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 384000
  counters:
    num_agent_steps_sampled: 384000
    num_agent_steps_trained: 384000
    num_env_steps_sampled: 384000
    num_env_steps_trained: 384000
  custom_metrics: {}
  date: 2022-07-23_21-37-28
  done: false
  episode_len_mean: 780.15
  episode_media: {}
  episode_reward_max: 213.01129964626418
  episode_reward_mean: 82.83012411607697
  episode_reward_min: -103.9310456997668
  episodes_this_iter: 5
  episodes_total: 1205
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.5849394527468104e-27
          cur_lr: 4.999999873689376e-05
          entropy: 1.077913761138916
          entropy_coeff: 0.0
          kl: 0.0019003220368176699
          model: {}
          policy_loss: -3.696193016367033e-05
          total_loss: 4.37673807144165
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,96,1680.05,384000,82.8301,213.011,-103.931,780.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,96,1680.05,384000,82.8301,213.011,-103.931,780.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,96,1680.05,384000,82.8301,213.011,-103.931,780.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 388000
  counters:
    num_agent_steps_sampled: 388000
    num_agent_steps_trained: 388000
    num_env_steps_sampled: 388000
    num_env_steps_trained: 388000
  custom_metrics: {}
  date: 2022-07-23_21-37-45
  done: false
  episode_len_mean: 784.09
  episode_media: {}
  episode_reward_max: 213.01129964626418
  episode_reward_mean: 83.76922676738252
  episode_reward_min: -103.9310456997668
  episodes_this_iter: 5
  episodes_total: 1210
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.2924697263734052e-27
          cur_lr: 4.999999873689376e-05
          entropy: 1.0626260042190552
          entropy_coeff: 0.0
          kl: 0.002567252144217491
          model: {}
          policy_loss: -0.002777994843199849
          total_loss: 5.376783847808838
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,97,1697.29,388000,83.7692,213.011,-103.931,784.09


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,97,1697.29,388000,83.7692,213.011,-103.931,784.09


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,97,1697.29,388000,83.7692,213.011,-103.931,784.09


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 392000
  counters:
    num_agent_steps_sampled: 392000
    num_agent_steps_trained: 392000
    num_env_steps_sampled: 392000
    num_env_steps_trained: 392000
  custom_metrics: {}
  date: 2022-07-23_21-38-03
  done: false
  episode_len_mean: 785.21
  episode_media: {}
  episode_reward_max: 213.01129964626418
  episode_reward_mean: 83.30732969417141
  episode_reward_min: -103.9310456997668
  episodes_this_iter: 5
  episodes_total: 1215
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 6.462348631867026e-28
          cur_lr: 4.999999873689376e-05
          entropy: 1.0747861862182617
          entropy_coeff: 0.0
          kl: 0.0020751510746777058
          model: {}
          policy_loss: -0.007684758864343166
          total_loss: 5.955342769622803
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,98,1715.14,392000,83.3073,213.011,-103.931,785.21


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,98,1715.14,392000,83.3073,213.011,-103.931,785.21


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,98,1715.14,392000,83.3073,213.011,-103.931,785.21


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 396000
  counters:
    num_agent_steps_sampled: 396000
    num_agent_steps_trained: 396000
    num_env_steps_sampled: 396000
    num_env_steps_trained: 396000
  custom_metrics: {}
  date: 2022-07-23_21-38-21
  done: false
  episode_len_mean: 802.84
  episode_media: {}
  episode_reward_max: 213.01129964626418
  episode_reward_mean: 83.48953444338159
  episode_reward_min: -140.86343634936821
  episodes_this_iter: 6
  episodes_total: 1221
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.231174315933513e-28
          cur_lr: 4.999999873689376e-05
          entropy: 1.0826202630996704
          entropy_coeff: 0.0
          kl: 0.0013111868174746633
          model: {}
          policy_loss: -0.0032214056700468063
          total_loss: 5.781482219696045
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,99,1733.19,396000,83.4895,213.011,-140.863,802.84


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,99,1733.19,396000,83.4895,213.011,-140.863,802.84


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,99,1733.19,396000,83.4895,213.011,-140.863,802.84


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 400000
  counters:
    num_agent_steps_sampled: 400000
    num_agent_steps_trained: 400000
    num_env_steps_sampled: 400000
    num_env_steps_trained: 400000
  custom_metrics: {}
  date: 2022-07-23_21-38-39
  done: false
  episode_len_mean: 826.8
  episode_media: {}
  episode_reward_max: 213.01129964626418
  episode_reward_mean: 85.66951709867226
  episode_reward_min: -140.86343634936821
  episodes_this_iter: 4
  episodes_total: 1225
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.6155871579667565e-28
          cur_lr: 4.999999873689376e-05
          entropy: 0.8253870010375977
          entropy_coeff: 0.0
          kl: 0.0026073807384818792
          model: {}
          policy_loss: 0.0010225380538031459
          total_loss: 5.9413275718688965
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,100,1750.9,400000,85.6695,213.011,-140.863,826.8


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,100,1750.9,400000,85.6695,213.011,-140.863,826.8


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,100,1750.9,400000,85.6695,213.011,-140.863,826.8


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 404000
  counters:
    num_agent_steps_sampled: 404000
    num_agent_steps_trained: 404000
    num_env_steps_sampled: 404000
    num_env_steps_trained: 404000
  custom_metrics: {}
  date: 2022-07-23_21-38-56
  done: false
  episode_len_mean: 795.96
  episode_media: {}
  episode_reward_max: 213.01129964626418
  episode_reward_mean: 81.24827125560837
  episode_reward_min: -140.86343634936821
  episodes_this_iter: 6
  episodes_total: 1231
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 8.077935789833782e-29
          cur_lr: 4.999999873689376e-05
          entropy: 0.9636637568473816
          entropy_coeff: 0.0
          kl: 0.0007257151301018894
          model: {}
          policy_loss: -0.0028868112713098526
          total_loss: 5.834120750427246
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,101,1768.15,404000,81.2483,213.011,-140.863,795.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,101,1768.15,404000,81.2483,213.011,-140.863,795.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,101,1768.15,404000,81.2483,213.011,-140.863,795.96


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 408000
  counters:
    num_agent_steps_sampled: 408000
    num_agent_steps_trained: 408000
    num_env_steps_sampled: 408000
    num_env_steps_trained: 408000
  custom_metrics: {}
  date: 2022-07-23_21-39-14
  done: false
  episode_len_mean: 788.71
  episode_media: {}
  episode_reward_max: 213.01129964626418
  episode_reward_mean: 81.72959133298072
  episode_reward_min: -140.86343634936821
  episodes_this_iter: 5
  episodes_total: 1236
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.038967894916891e-29
          cur_lr: 4.999999873689376e-05
          entropy: 0.9796260595321655
          entropy_coeff: 0.0
          kl: 0.0008656998979859054
          model: {}
          policy_loss: -0.005343559663742781
          total_loss: 6.032333850860596
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,102,1786.04,408000,81.7296,213.011,-140.863,788.71


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,102,1786.04,408000,81.7296,213.011,-140.863,788.71


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,102,1786.04,408000,81.7296,213.011,-140.863,788.71


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 412000
  counters:
    num_agent_steps_sampled: 412000
    num_agent_steps_trained: 412000
    num_env_steps_sampled: 412000
    num_env_steps_trained: 412000
  custom_metrics: {}
  date: 2022-07-23_21-39-31
  done: false
  episode_len_mean: 791.97
  episode_media: {}
  episode_reward_max: 213.01129964626418
  episode_reward_mean: 85.42272812332676
  episode_reward_min: -140.86343634936821
  episodes_this_iter: 5
  episodes_total: 1241
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.0194839474584456e-29
          cur_lr: 4.999999873689376e-05
          entropy: 0.9881000518798828
          entropy_coeff: 0.0
          kl: 0.001164299319498241
          model: {}
          policy_loss: -0.005056031979620457
          total_loss: 6.558657646179199
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,103,1802.93,412000,85.4227,213.011,-140.863,791.97


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,103,1802.93,412000,85.4227,213.011,-140.863,791.97


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,103,1802.93,412000,85.4227,213.011,-140.863,791.97


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 416000
  counters:
    num_agent_steps_sampled: 416000
    num_agent_steps_trained: 416000
    num_env_steps_sampled: 416000
    num_env_steps_trained: 416000
  custom_metrics: {}
  date: 2022-07-23_21-39-50
  done: false
  episode_len_mean: 799.12
  episode_media: {}
  episode_reward_max: 213.01129964626418
  episode_reward_mean: 86.93501314022068
  episode_reward_min: -140.86343634936821
  episodes_this_iter: 4
  episodes_total: 1245
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.0097419737292228e-29
          cur_lr: 4.999999873689376e-05
          entropy: 0.9588848948478699
          entropy_coeff: 0.0
          kl: 0.0011265964712947607
          model: {}
          policy_loss: -0.00586838461458683
          total_loss: 5.986729621887207
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,104,1821.9,416000,86.935,213.011,-140.863,799.12


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,104,1821.9,416000,86.935,213.011,-140.863,799.12


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,104,1821.9,416000,86.935,213.011,-140.863,799.12


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 420000
  counters:
    num_agent_steps_sampled: 420000
    num_agent_steps_trained: 420000
    num_env_steps_sampled: 420000
    num_env_steps_trained: 420000
  custom_metrics: {}
  date: 2022-07-23_21-40-09
  done: false
  episode_len_mean: 806.95
  episode_media: {}
  episode_reward_max: 213.01129964626418
  episode_reward_mean: 90.10913654564949
  episode_reward_min: -140.86343634936821
  episodes_this_iter: 4
  episodes_total: 1249
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 5.048709868646114e-30
          cur_lr: 4.999999873689376e-05
          entropy: 0.8855140805244446
          entropy_coeff: 0.0
          kl: 0.001980650471523404
          model: {}
          policy_loss: -0.004636387340724468
          total_loss: 6.003791809082031
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,105,1840.08,420000,90.1091,213.011,-140.863,806.95


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,105,1840.08,420000,90.1091,213.011,-140.863,806.95


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,105,1840.08,420000,90.1091,213.011,-140.863,806.95


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 424000
  counters:
    num_agent_steps_sampled: 424000
    num_agent_steps_trained: 424000
    num_env_steps_sampled: 424000
    num_env_steps_trained: 424000
  custom_metrics: {}
  date: 2022-07-23_21-40-27
  done: false
  episode_len_mean: 830.44
  episode_media: {}
  episode_reward_max: 213.01129964626418
  episode_reward_mean: 97.49219910211576
  episode_reward_min: -140.86343634936821
  episodes_this_iter: 7
  episodes_total: 1256
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.524354934323057e-30
          cur_lr: 4.999999873689376e-05
          entropy: 0.9302951097488403
          entropy_coeff: 0.0
          kl: 0.0040655904449522495
          model: {}
          policy_loss: 0.002172181149944663
          total_loss: 6.544187545776367
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,106,1858.41,424000,97.4922,213.011,-140.863,830.44


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,106,1858.41,424000,97.4922,213.011,-140.863,830.44


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,106,1858.41,424000,97.4922,213.011,-140.863,830.44


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 428000
  counters:
    num_agent_steps_sampled: 428000
    num_agent_steps_trained: 428000
    num_env_steps_sampled: 428000
    num_env_steps_trained: 428000
  custom_metrics: {}
  date: 2022-07-23_21-40-46
  done: false
  episode_len_mean: 821.29
  episode_media: {}
  episode_reward_max: 218.62919209036698
  episode_reward_mean: 97.3094909296323
  episode_reward_min: -140.86343634936821
  episodes_this_iter: 5
  episodes_total: 1261
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.2621774671615285e-30
          cur_lr: 4.999999873689376e-05
          entropy: 0.8739277720451355
          entropy_coeff: 0.0
          kl: 0.0016426483634859324
          model: {}
          policy_loss: -0.010105411522090435
          total_loss: 5.400640964508057
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,107,1876.91,428000,97.3095,218.629,-140.863,821.29


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,107,1876.91,428000,97.3095,218.629,-140.863,821.29


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,107,1876.91,428000,97.3095,218.629,-140.863,821.29


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 432000
  counters:
    num_agent_steps_sampled: 432000
    num_agent_steps_trained: 432000
    num_env_steps_sampled: 432000
    num_env_steps_trained: 432000
  custom_metrics: {}
  date: 2022-07-23_21-41-04
  done: false
  episode_len_mean: 778.12
  episode_media: {}
  episode_reward_max: 218.62919209036698
  episode_reward_mean: 89.26593496006696
  episode_reward_min: -140.86343634936821
  episodes_this_iter: 9
  episodes_total: 1270
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 6.3108873358076425e-31
          cur_lr: 4.999999873689376e-05
          entropy: 0.8860469460487366
          entropy_coeff: 0.0
          kl: 0.0018809010507538915
          model: {}
          policy_loss: -0.0006100083701312542
          total_loss: 7.113195896148682
          vf_explained_va

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,108,1895.02,432000,89.2659,218.629,-140.863,778.12


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,108,1895.02,432000,89.2659,218.629,-140.863,778.12


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,108,1895.02,432000,89.2659,218.629,-140.863,778.12


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 436000
  counters:
    num_agent_steps_sampled: 436000
    num_agent_steps_trained: 436000
    num_env_steps_sampled: 436000
    num_env_steps_trained: 436000
  custom_metrics: {}
  date: 2022-07-23_21-41-21
  done: false
  episode_len_mean: 771.43
  episode_media: {}
  episode_reward_max: 228.95294799728737
  episode_reward_mean: 91.83617730865971
  episode_reward_min: -140.86343634936821
  episodes_this_iter: 6
  episodes_total: 1276
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.1554436679038213e-31
          cur_lr: 4.999999873689376e-05
          entropy: 0.8499624729156494
          entropy_coeff: 0.0
          kl: 0.0004883285728283226
          model: {}
          policy_loss: -0.0041829971596598625
          total_loss: 6.688403606414795
          vf_explained_va

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,109,1912.66,436000,91.8362,228.953,-140.863,771.43


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,109,1912.66,436000,91.8362,228.953,-140.863,771.43


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,109,1912.66,436000,91.8362,228.953,-140.863,771.43


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 440000
  counters:
    num_agent_steps_sampled: 440000
    num_agent_steps_trained: 440000
    num_env_steps_sampled: 440000
    num_env_steps_trained: 440000
  custom_metrics: {}
  date: 2022-07-23_21-41-39
  done: false
  episode_len_mean: 741.38
  episode_media: {}
  episode_reward_max: 239.92613802489666
  episode_reward_mean: 93.18317143776885
  episode_reward_min: -140.86343634936821
  episodes_this_iter: 9
  episodes_total: 1285
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.5777218339519106e-31
          cur_lr: 4.999999873689376e-05
          entropy: 0.834959089756012
          entropy_coeff: 0.0
          kl: 0.003476008540019393
          model: {}
          policy_loss: -0.00743790902197361
          total_loss: 7.767946720123291
          vf_explained_var: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,110,1930.43,440000,93.1832,239.926,-140.863,741.38


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,110,1930.43,440000,93.1832,239.926,-140.863,741.38


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,110,1930.43,440000,93.1832,239.926,-140.863,741.38


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 444000
  counters:
    num_agent_steps_sampled: 444000
    num_agent_steps_trained: 444000
    num_env_steps_sampled: 444000
    num_env_steps_trained: 444000
  custom_metrics: {}
  date: 2022-07-23_21-41-58
  done: false
  episode_len_mean: 741.22
  episode_media: {}
  episode_reward_max: 239.92613802489666
  episode_reward_mean: 94.94266555187374
  episode_reward_min: -140.86343634936821
  episodes_this_iter: 4
  episodes_total: 1289
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 7.888609169759553e-32
          cur_lr: 4.999999873689376e-05
          entropy: 0.858730673789978
          entropy_coeff: 0.0
          kl: 0.007149260025471449
          model: {}
          policy_loss: 0.0009655532194301486
          total_loss: 7.5393218994140625
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,111,1948.92,444000,94.9427,239.926,-140.863,741.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,111,1948.92,444000,94.9427,239.926,-140.863,741.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,111,1948.92,444000,94.9427,239.926,-140.863,741.22


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 448000
  counters:
    num_agent_steps_sampled: 448000
    num_agent_steps_trained: 448000
    num_env_steps_sampled: 448000
    num_env_steps_trained: 448000
  custom_metrics: {}
  date: 2022-07-23_21-42-16
  done: false
  episode_len_mean: 718.69
  episode_media: {}
  episode_reward_max: 239.92613802489666
  episode_reward_mean: 91.22658851050029
  episode_reward_min: -140.86343634936821
  episodes_this_iter: 7
  episodes_total: 1296
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 7.888609169759553e-32
          cur_lr: 4.999999873689376e-05
          entropy: 0.8259251117706299
          entropy_coeff: 0.0
          kl: 0.0018803568091243505
          model: {}
          policy_loss: -0.0051664505153894424
          total_loss: 7.72358512878418
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,112,1966.98,448000,91.2266,239.926,-140.863,718.69


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,112,1966.98,448000,91.2266,239.926,-140.863,718.69


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,112,1966.98,448000,91.2266,239.926,-140.863,718.69


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 452000
  counters:
    num_agent_steps_sampled: 452000
    num_agent_steps_trained: 452000
    num_env_steps_sampled: 452000
    num_env_steps_trained: 452000
  custom_metrics: {}
  date: 2022-07-23_21-42-34
  done: false
  episode_len_mean: 685.12
  episode_media: {}
  episode_reward_max: 239.92613802489666
  episode_reward_mean: 89.93340052278428
  episode_reward_min: -140.86343634936821
  episodes_this_iter: 7
  episodes_total: 1303
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.9443045848797766e-32
          cur_lr: 4.999999873689376e-05
          entropy: 0.7436686754226685
          entropy_coeff: 0.0
          kl: 0.004660534206777811
          model: {}
          policy_loss: -0.004518185742199421
          total_loss: 6.940357208251953
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,113,1985.13,452000,89.9334,239.926,-140.863,685.12


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,113,1985.13,452000,89.9334,239.926,-140.863,685.12


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,113,1985.13,452000,89.9334,239.926,-140.863,685.12


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 456000
  counters:
    num_agent_steps_sampled: 456000
    num_agent_steps_trained: 456000
    num_env_steps_sampled: 456000
    num_env_steps_trained: 456000
  custom_metrics: {}
  date: 2022-07-23_21-42-52
  done: false
  episode_len_mean: 685.54
  episode_media: {}
  episode_reward_max: 239.92613802489666
  episode_reward_mean: 88.94456042786145
  episode_reward_min: -140.86343634936821
  episodes_this_iter: 6
  episodes_total: 1309
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.9721522924398883e-32
          cur_lr: 4.999999873689376e-05
          entropy: 0.7054013609886169
          entropy_coeff: 0.0
          kl: 0.006892811506986618
          model: {}
          policy_loss: -0.008165053091943264
          total_loss: 6.553400039672852
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,114,2003.41,456000,88.9446,239.926,-140.863,685.54


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,114,2003.41,456000,88.9446,239.926,-140.863,685.54


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,114,2003.41,456000,88.9446,239.926,-140.863,685.54


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 460000
  counters:
    num_agent_steps_sampled: 460000
    num_agent_steps_trained: 460000
    num_env_steps_sampled: 460000
    num_env_steps_trained: 460000
  custom_metrics: {}
  date: 2022-07-23_21-43-10
  done: false
  episode_len_mean: 692.57
  episode_media: {}
  episode_reward_max: 239.92613802489666
  episode_reward_mean: 89.26316566229339
  episode_reward_min: -140.86343634936821
  episodes_this_iter: 4
  episodes_total: 1313
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.9721522924398883e-32
          cur_lr: 4.999999873689376e-05
          entropy: 0.7523353695869446
          entropy_coeff: 0.0
          kl: 0.006802257616072893
          model: {}
          policy_loss: -0.008593481965363026
          total_loss: 6.093634128570557
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,115,2021.13,460000,89.2632,239.926,-140.863,692.57


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,115,2021.13,460000,89.2632,239.926,-140.863,692.57


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,115,2021.13,460000,89.2632,239.926,-140.863,692.57


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 464000
  counters:
    num_agent_steps_sampled: 464000
    num_agent_steps_trained: 464000
    num_env_steps_sampled: 464000
    num_env_steps_trained: 464000
  custom_metrics: {}
  date: 2022-07-23_21-43-29
  done: false
  episode_len_mean: 692.52
  episode_media: {}
  episode_reward_max: 239.92613802489666
  episode_reward_mean: 88.90905686099546
  episode_reward_min: -140.86343634936821
  episodes_this_iter: 5
  episodes_total: 1318
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.9721522924398883e-32
          cur_lr: 4.999999873689376e-05
          entropy: 0.8570346832275391
          entropy_coeff: 0.0
          kl: 0.004700908437371254
          model: {}
          policy_loss: -0.005855509079992771
          total_loss: 5.663057804107666
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,116,2039.65,464000,88.9091,239.926,-140.863,692.52


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,116,2039.65,464000,88.9091,239.926,-140.863,692.52


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,116,2039.65,464000,88.9091,239.926,-140.863,692.52


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 468000
  counters:
    num_agent_steps_sampled: 468000
    num_agent_steps_trained: 468000
    num_env_steps_sampled: 468000
    num_env_steps_trained: 468000
  custom_metrics: {}
  date: 2022-07-23_21-43-48
  done: false
  episode_len_mean: 685.37
  episode_media: {}
  episode_reward_max: 239.92613802489666
  episode_reward_mean: 88.35619902596545
  episode_reward_min: -67.0005996276932
  episodes_this_iter: 6
  episodes_total: 1324
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 9.860761462199441e-33
          cur_lr: 4.999999873689376e-05
          entropy: 0.9010691046714783
          entropy_coeff: 0.0
          kl: 0.000636958284303546
          model: {}
          policy_loss: -0.0032323224004358053
          total_loss: 5.8669891357421875
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,117,2058.82,468000,88.3562,239.926,-67.0006,685.37


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,117,2058.82,468000,88.3562,239.926,-67.0006,685.37


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,117,2058.82,468000,88.3562,239.926,-67.0006,685.37


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 472000
  counters:
    num_agent_steps_sampled: 472000
    num_agent_steps_trained: 472000
    num_env_steps_sampled: 472000
    num_env_steps_trained: 472000
  custom_metrics: {}
  date: 2022-07-23_21-44-08
  done: false
  episode_len_mean: 689.43
  episode_media: {}
  episode_reward_max: 239.92613802489666
  episode_reward_mean: 90.36996043332664
  episode_reward_min: -80.19504759701215
  episodes_this_iter: 6
  episodes_total: 1330
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.930380731099721e-33
          cur_lr: 4.999999873689376e-05
          entropy: 0.8969773054122925
          entropy_coeff: 0.0
          kl: 0.0016068024560809135
          model: {}
          policy_loss: -0.005470752716064453
          total_loss: 6.441761016845703
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,118,2078.44,472000,90.37,239.926,-80.195,689.43


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,118,2078.44,472000,90.37,239.926,-80.195,689.43


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,118,2078.44,472000,90.37,239.926,-80.195,689.43


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 476000
  counters:
    num_agent_steps_sampled: 476000
    num_agent_steps_trained: 476000
    num_env_steps_sampled: 476000
    num_env_steps_trained: 476000
  custom_metrics: {}
  date: 2022-07-23_21-44-28
  done: false
  episode_len_mean: 677.21
  episode_media: {}
  episode_reward_max: 239.92613802489666
  episode_reward_mean: 91.49538601771508
  episode_reward_min: -80.19504759701215
  episodes_this_iter: 6
  episodes_total: 1336
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.4651903655498604e-33
          cur_lr: 4.999999873689376e-05
          entropy: 0.8185082077980042
          entropy_coeff: 0.0
          kl: 0.0008426823769696057
          model: {}
          policy_loss: -0.005193854216486216
          total_loss: 5.688602447509766
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,119,2098.44,476000,91.4954,239.926,-80.195,677.21


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,119,2098.44,476000,91.4954,239.926,-80.195,677.21


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,119,2098.44,476000,91.4954,239.926,-80.195,677.21


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 480000
  counters:
    num_agent_steps_sampled: 480000
    num_agent_steps_trained: 480000
    num_env_steps_sampled: 480000
    num_env_steps_trained: 480000
  custom_metrics: {}
  date: 2022-07-23_21-44-46
  done: false
  episode_len_mean: 682.42
  episode_media: {}
  episode_reward_max: 239.92613802489666
  episode_reward_mean: 89.31996493003646
  episode_reward_min: -80.19504759701215
  episodes_this_iter: 5
  episodes_total: 1341
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.2325951827749302e-33
          cur_lr: 4.999999873689376e-05
          entropy: 0.8884109258651733
          entropy_coeff: 0.0
          kl: 0.0021926879417151213
          model: {}
          policy_loss: -0.004090970382094383
          total_loss: 4.8518385887146
          vf_explained_var: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,120,2116.82,480000,89.32,239.926,-80.195,682.42


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,120,2116.82,480000,89.32,239.926,-80.195,682.42


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,120,2116.82,480000,89.32,239.926,-80.195,682.42


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 484000
  counters:
    num_agent_steps_sampled: 484000
    num_agent_steps_trained: 484000
    num_env_steps_sampled: 484000
    num_env_steps_trained: 484000
  custom_metrics: {}
  date: 2022-07-23_21-45-06
  done: false
  episode_len_mean: 674.97
  episode_media: {}
  episode_reward_max: 239.92613802489666
  episode_reward_mean: 87.92031228920095
  episode_reward_min: -80.19504759701215
  episodes_this_iter: 5
  episodes_total: 1346
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 6.162975913874651e-34
          cur_lr: 4.999999873689376e-05
          entropy: 0.9109839797019958
          entropy_coeff: 0.0
          kl: 0.0025201765820384026
          model: {}
          policy_loss: -0.005924723576754332
          total_loss: 4.673896789550781
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,121,2136.35,484000,87.9203,239.926,-80.195,674.97


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,121,2136.35,484000,87.9203,239.926,-80.195,674.97


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,121,2136.35,484000,87.9203,239.926,-80.195,674.97


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 488000
  counters:
    num_agent_steps_sampled: 488000
    num_agent_steps_trained: 488000
    num_env_steps_sampled: 488000
    num_env_steps_trained: 488000
  custom_metrics: {}
  date: 2022-07-23_21-45-25
  done: false
  episode_len_mean: 649.48
  episode_media: {}
  episode_reward_max: 239.92613802489666
  episode_reward_mean: 86.9366117562031
  episode_reward_min: -80.19504759701215
  episodes_this_iter: 9
  episodes_total: 1355
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.0814879569373254e-34
          cur_lr: 4.999999873689376e-05
          entropy: 0.8349852561950684
          entropy_coeff: 0.0
          kl: 0.003993775229901075
          model: {}
          policy_loss: -0.006425175815820694
          total_loss: 7.535002708435059
          vf_explained_var: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,122,2155.13,488000,86.9366,239.926,-80.195,649.48


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,122,2155.13,488000,86.9366,239.926,-80.195,649.48


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,122,2155.13,488000,86.9366,239.926,-80.195,649.48


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 492000
  counters:
    num_agent_steps_sampled: 492000
    num_agent_steps_trained: 492000
    num_env_steps_sampled: 492000
    num_env_steps_trained: 492000
  custom_metrics: {}
  date: 2022-07-23_21-45-44
  done: false
  episode_len_mean: 649.54
  episode_media: {}
  episode_reward_max: 239.92613802489666
  episode_reward_mean: 88.0791713469854
  episode_reward_min: -80.19504759701215
  episodes_this_iter: 5
  episodes_total: 1360
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.5407439784686627e-34
          cur_lr: 4.999999873689376e-05
          entropy: 0.9152687191963196
          entropy_coeff: 0.0
          kl: 0.011216720566153526
          model: {}
          policy_loss: -0.006648744456470013
          total_loss: 5.159183025360107
          vf_explained_var: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,123,2174.35,492000,88.0792,239.926,-80.195,649.54


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,123,2174.35,492000,88.0792,239.926,-80.195,649.54


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,123,2174.35,492000,88.0792,239.926,-80.195,649.54


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 496000
  counters:
    num_agent_steps_sampled: 496000
    num_agent_steps_trained: 496000
    num_env_steps_sampled: 496000
    num_env_steps_trained: 496000
  custom_metrics: {}
  date: 2022-07-23_21-46-04
  done: false
  episode_len_mean: 658.63
  episode_media: {}
  episode_reward_max: 239.92613802489666
  episode_reward_mean: 89.995150726284
  episode_reward_min: -80.19504759701215
  episodes_this_iter: 5
  episodes_total: 1365
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.5407439784686627e-34
          cur_lr: 4.999999873689376e-05
          entropy: 0.9560779929161072
          entropy_coeff: 0.0
          kl: 0.002835572697222233
          model: {}
          policy_loss: -0.004228232428431511
          total_loss: 5.419404029846191
          vf_explained_var: 0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,124,2193.96,496000,89.9952,239.926,-80.195,658.63


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,124,2193.96,496000,89.9952,239.926,-80.195,658.63


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,124,2193.96,496000,89.9952,239.926,-80.195,658.63


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 500000
  counters:
    num_agent_steps_sampled: 500000
    num_agent_steps_trained: 500000
    num_env_steps_sampled: 500000
    num_env_steps_trained: 500000
  custom_metrics: {}
  date: 2022-07-23_21-46-22
  done: false
  episode_len_mean: 683.34
  episode_media: {}
  episode_reward_max: 239.92613802489666
  episode_reward_mean: 94.224663383125
  episode_reward_min: -80.19504759701215
  episodes_this_iter: 5
  episodes_total: 1370
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 7.703719892343314e-35
          cur_lr: 4.999999873689376e-05
          entropy: 0.8858960270881653
          entropy_coeff: 0.0
          kl: 0.005649464670568705
          model: {}
          policy_loss: -0.005142916459590197
          total_loss: 5.351179599761963
          vf_explained_var: 0.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,125,2212.44,500000,94.2247,239.926,-80.195,683.34


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,125,2212.44,500000,94.2247,239.926,-80.195,683.34


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,125,2212.44,500000,94.2247,239.926,-80.195,683.34


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 504000
  counters:
    num_agent_steps_sampled: 504000
    num_agent_steps_trained: 504000
    num_env_steps_sampled: 504000
    num_env_steps_trained: 504000
  custom_metrics: {}
  date: 2022-07-23_21-46-41
  done: false
  episode_len_mean: 691.39
  episode_media: {}
  episode_reward_max: 239.92613802489666
  episode_reward_mean: 94.91609021544613
  episode_reward_min: -80.19504759701215
  episodes_this_iter: 5
  episodes_total: 1375
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 7.703719892343314e-35
          cur_lr: 4.999999873689376e-05
          entropy: 0.8991664052009583
          entropy_coeff: 0.0
          kl: 0.0007265393505804241
          model: {}
          policy_loss: 0.0028921172488480806
          total_loss: 4.394952774047852
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,126,2230.79,504000,94.9161,239.926,-80.195,691.39


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,126,2230.79,504000,94.9161,239.926,-80.195,691.39


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,126,2230.79,504000,94.9161,239.926,-80.195,691.39


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 508000
  counters:
    num_agent_steps_sampled: 508000
    num_agent_steps_trained: 508000
    num_env_steps_sampled: 508000
    num_env_steps_trained: 508000
  custom_metrics: {}
  date: 2022-07-23_21-46-59
  done: false
  episode_len_mean: 697.69
  episode_media: {}
  episode_reward_max: 239.92613802489666
  episode_reward_mean: 94.67325187344994
  episode_reward_min: -80.19504759701215
  episodes_this_iter: 4
  episodes_total: 1379
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.851859946171657e-35
          cur_lr: 4.999999873689376e-05
          entropy: 0.9293955564498901
          entropy_coeff: 0.0
          kl: 0.002462198957800865
          model: {}
          policy_loss: -0.0027504125609993935
          total_loss: 4.704528331756592
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,127,2249.27,508000,94.6733,239.926,-80.195,697.69


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,127,2249.27,508000,94.6733,239.926,-80.195,697.69


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,127,2249.27,508000,94.6733,239.926,-80.195,697.69


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 512000
  counters:
    num_agent_steps_sampled: 512000
    num_agent_steps_trained: 512000
    num_env_steps_sampled: 512000
    num_env_steps_trained: 512000
  custom_metrics: {}
  date: 2022-07-23_21-47-18
  done: false
  episode_len_mean: 728.4
  episode_media: {}
  episode_reward_max: 232.7217992925426
  episode_reward_mean: 97.53254775392872
  episode_reward_min: -80.19504759701215
  episodes_this_iter: 4
  episodes_total: 1383
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.9259299730858284e-35
          cur_lr: 4.999999873689376e-05
          entropy: 0.9069352746009827
          entropy_coeff: 0.0
          kl: 0.013151886872947216
          model: {}
          policy_loss: -0.008404293097555637
          total_loss: 3.674765110015869
          vf_explained_var: 0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,128,2267.63,512000,97.5325,232.722,-80.195,728.4


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,128,2267.63,512000,97.5325,232.722,-80.195,728.4


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,128,2267.63,512000,97.5325,232.722,-80.195,728.4


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 516000
  counters:
    num_agent_steps_sampled: 516000
    num_agent_steps_trained: 516000
    num_env_steps_sampled: 516000
    num_env_steps_trained: 516000
  custom_metrics: {}
  date: 2022-07-23_21-47-36
  done: false
  episode_len_mean: 712.26
  episode_media: {}
  episode_reward_max: 232.7435659615084
  episode_reward_mean: 96.61277984850324
  episode_reward_min: -80.19504759701215
  episodes_this_iter: 7
  episodes_total: 1390
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.9259299730858284e-35
          cur_lr: 4.999999873689376e-05
          entropy: 0.8795217275619507
          entropy_coeff: 0.0
          kl: 0.0018657644977793097
          model: {}
          policy_loss: -0.002827115124091506
          total_loss: 6.383900165557861
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,129,2285.63,516000,96.6128,232.744,-80.195,712.26


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,129,2285.63,516000,96.6128,232.744,-80.195,712.26


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,129,2285.63,516000,96.6128,232.744,-80.195,712.26


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 520000
  counters:
    num_agent_steps_sampled: 520000
    num_agent_steps_trained: 520000
    num_env_steps_sampled: 520000
    num_env_steps_trained: 520000
  custom_metrics: {}
  date: 2022-07-23_21-47-54
  done: false
  episode_len_mean: 718.29
  episode_media: {}
  episode_reward_max: 232.7435659615084
  episode_reward_mean: 95.70783352582437
  episode_reward_min: -80.19504759701215
  episodes_this_iter: 6
  episodes_total: 1396
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 9.629649865429142e-36
          cur_lr: 4.999999873689376e-05
          entropy: 0.8908835649490356
          entropy_coeff: 0.0
          kl: 0.0017395269824191928
          model: {}
          policy_loss: -0.0033158501610159874
          total_loss: 6.448554039001465
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,130,2303.99,520000,95.7078,232.744,-80.195,718.29


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,130,2303.99,520000,95.7078,232.744,-80.195,718.29


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,130,2303.99,520000,95.7078,232.744,-80.195,718.29


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 524000
  counters:
    num_agent_steps_sampled: 524000
    num_agent_steps_trained: 524000
    num_env_steps_sampled: 524000
    num_env_steps_trained: 524000
  custom_metrics: {}
  date: 2022-07-23_21-48-12
  done: false
  episode_len_mean: 734.19
  episode_media: {}
  episode_reward_max: 232.7435659615084
  episode_reward_mean: 96.0418844796298
  episode_reward_min: -80.19504759701215
  episodes_this_iter: 6
  episodes_total: 1402
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 4.814824932714571e-36
          cur_lr: 4.999999873689376e-05
          entropy: 0.8703970313072205
          entropy_coeff: 0.0
          kl: 0.0020617973059415817
          model: {}
          policy_loss: -0.005248702596873045
          total_loss: 5.989431381225586
          vf_explained_var: 0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,131,2322.15,524000,96.0419,232.744,-80.195,734.19


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,131,2322.15,524000,96.0419,232.744,-80.195,734.19


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,131,2322.15,524000,96.0419,232.744,-80.195,734.19


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 528000
  counters:
    num_agent_steps_sampled: 528000
    num_agent_steps_trained: 528000
    num_env_steps_sampled: 528000
    num_env_steps_trained: 528000
  custom_metrics: {}
  date: 2022-07-23_21-48-31
  done: false
  episode_len_mean: 741.53
  episode_media: {}
  episode_reward_max: 232.7435659615084
  episode_reward_mean: 94.47147989918346
  episode_reward_min: -80.19504759701215
  episodes_this_iter: 5
  episodes_total: 1407
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.4074124663572855e-36
          cur_lr: 4.999999873689376e-05
          entropy: 0.8034831881523132
          entropy_coeff: 0.0
          kl: 0.008010861463844776
          model: {}
          policy_loss: -0.005401119124144316
          total_loss: 4.9478254318237305
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,132,2341.05,528000,94.4715,232.744,-80.195,741.53


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,132,2341.05,528000,94.4715,232.744,-80.195,741.53


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,132,2341.05,528000,94.4715,232.744,-80.195,741.53


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 532000
  counters:
    num_agent_steps_sampled: 532000
    num_agent_steps_trained: 532000
    num_env_steps_sampled: 532000
    num_env_steps_trained: 532000
  custom_metrics: {}
  date: 2022-07-23_21-48-50
  done: false
  episode_len_mean: 719.23
  episode_media: {}
  episode_reward_max: 233.59161089158928
  episode_reward_mean: 97.34260452393958
  episode_reward_min: -80.19504759701215
  episodes_this_iter: 6
  episodes_total: 1413
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 2.4074124663572855e-36
          cur_lr: 4.999999873689376e-05
          entropy: 0.8507678508758545
          entropy_coeff: 0.0
          kl: 0.002073912648484111
          model: {}
          policy_loss: -0.006084071006625891
          total_loss: 5.358857154846191
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,133,2359.93,532000,97.3426,233.592,-80.195,719.23


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,133,2359.93,532000,97.3426,233.592,-80.195,719.23


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,133,2359.93,532000,97.3426,233.592,-80.195,719.23


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 536000
  counters:
    num_agent_steps_sampled: 536000
    num_agent_steps_trained: 536000
    num_env_steps_sampled: 536000
    num_env_steps_trained: 536000
  custom_metrics: {}
  date: 2022-07-23_21-49-09
  done: false
  episode_len_mean: 718.83
  episode_media: {}
  episode_reward_max: 233.59161089158928
  episode_reward_mean: 97.95766661410863
  episode_reward_min: -80.19504759701215
  episodes_this_iter: 5
  episodes_total: 1418
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.2037062331786428e-36
          cur_lr: 4.999999873689376e-05
          entropy: 0.9130333662033081
          entropy_coeff: 0.0
          kl: 0.001205815584398806
          model: {}
          policy_loss: -0.005407110787928104
          total_loss: 4.778907299041748
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,134,2378.61,536000,97.9577,233.592,-80.195,718.83


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,134,2378.61,536000,97.9577,233.592,-80.195,718.83


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,134,2378.61,536000,97.9577,233.592,-80.195,718.83


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 540000
  counters:
    num_agent_steps_sampled: 540000
    num_agent_steps_trained: 540000
    num_env_steps_sampled: 540000
    num_env_steps_trained: 540000
  custom_metrics: {}
  date: 2022-07-23_21-49-26
  done: false
  episode_len_mean: 725.68
  episode_media: {}
  episode_reward_max: 233.59161089158928
  episode_reward_mean: 100.32988893312236
  episode_reward_min: -80.19504759701215
  episodes_this_iter: 5
  episodes_total: 1423
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 6.018531165893214e-37
          cur_lr: 4.999999873689376e-05
          entropy: 0.9211582541465759
          entropy_coeff: 0.0
          kl: 0.004372166469693184
          model: {}
          policy_loss: -0.00414795707911253
          total_loss: 4.315662384033203
          vf_explained_var: 0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,135,2395.95,540000,100.33,233.592,-80.195,725.68


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,135,2395.95,540000,100.33,233.592,-80.195,725.68


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,135,2395.95,540000,100.33,233.592,-80.195,725.68


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 544000
  counters:
    num_agent_steps_sampled: 544000
    num_agent_steps_trained: 544000
    num_env_steps_sampled: 544000
    num_env_steps_trained: 544000
  custom_metrics: {}
  date: 2022-07-23_21-49-42
  done: false
  episode_len_mean: 716.36
  episode_media: {}
  episode_reward_max: 233.59161089158928
  episode_reward_mean: 100.65509557861839
  episode_reward_min: -80.19504759701215
  episodes_this_iter: 6
  episodes_total: 1429
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.009265582946607e-37
          cur_lr: 4.999999873689376e-05
          entropy: 0.8786079287528992
          entropy_coeff: 0.0
          kl: 0.009088529273867607
          model: {}
          policy_loss: -0.006468890700489283
          total_loss: 4.2949137687683105
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,136,2411.95,544000,100.655,233.592,-80.195,716.36


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,136,2411.95,544000,100.655,233.592,-80.195,716.36


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,136,2411.95,544000,100.655,233.592,-80.195,716.36


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 548000
  counters:
    num_agent_steps_sampled: 548000
    num_agent_steps_trained: 548000
    num_env_steps_sampled: 548000
    num_env_steps_trained: 548000
  custom_metrics: {}
  date: 2022-07-23_21-49-59
  done: false
  episode_len_mean: 727.18
  episode_media: {}
  episode_reward_max: 233.59161089158928
  episode_reward_mean: 102.5225032854984
  episode_reward_min: -60.776728709916725
  episodes_this_iter: 5
  episodes_total: 1434
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.009265582946607e-37
          cur_lr: 4.999999873689376e-05
          entropy: 0.7957441806793213
          entropy_coeff: 0.0
          kl: 0.004478566814213991
          model: {}
          policy_loss: -0.004970775917172432
          total_loss: 4.173970699310303
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,137,2428.68,548000,102.523,233.592,-60.7767,727.18


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,137,2428.68,548000,102.523,233.592,-60.7767,727.18


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,137,2428.68,548000,102.523,233.592,-60.7767,727.18


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 552000
  counters:
    num_agent_steps_sampled: 552000
    num_agent_steps_trained: 552000
    num_env_steps_sampled: 552000
    num_env_steps_trained: 552000
  custom_metrics: {}
  date: 2022-07-23_21-50-15
  done: false
  episode_len_mean: 740.51
  episode_media: {}
  episode_reward_max: 233.59161089158928
  episode_reward_mean: 104.28279914456827
  episode_reward_min: -60.776728709916725
  episodes_this_iter: 5
  episodes_total: 1439
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.5046327914733034e-37
          cur_lr: 4.999999873689376e-05
          entropy: 0.8673669695854187
          entropy_coeff: 0.0
          kl: 0.000678840558975935
          model: {}
          policy_loss: -0.003326564561575651
          total_loss: 3.317737102508545
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,138,2444.76,552000,104.283,233.592,-60.7767,740.51


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,138,2444.76,552000,104.283,233.592,-60.7767,740.51


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 556000
  counters:
    num_agent_steps_sampled: 556000
    num_agent_steps_trained: 556000
    num_env_steps_sampled: 556000
    num_env_steps_trained: 556000
  custom_metrics: {}
  date: 2022-07-23_21-50-31
  done: false
  episode_len_mean: 707.61
  episode_media: {}
  episode_reward_max: 233.59161089158928
  episode_reward_mean: 100.29459015176774
  episode_reward_min: -94.64388283527435
  episodes_this_iter: 8
  episodes_total: 1447
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 7.523163957366517e-38
          cur_lr: 4.999999873689376e-05
          entropy: 0.8150489330291748
          entropy_coeff: 0.0
          kl: 0.0031174516770988703
          model: {}
          policy_loss: -0.0035233739763498306
          total_loss: 6.852023601531982
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,139,2459.92,556000,100.295,233.592,-94.6439,707.61


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,139,2459.92,556000,100.295,233.592,-94.6439,707.61


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,139,2459.92,556000,100.295,233.592,-94.6439,707.61


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,139,2459.92,556000,100.295,233.592,-94.6439,707.61


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 560000
  counters:
    num_agent_steps_sampled: 560000
    num_agent_steps_trained: 560000
    num_env_steps_sampled: 560000
    num_env_steps_trained: 560000
  custom_metrics: {}
  date: 2022-07-23_21-50-46
  done: false
  episode_len_mean: 711.12
  episode_media: {}
  episode_reward_max: 233.59161089158928
  episode_reward_mean: 101.34625093093358
  episode_reward_min: -94.64388283527435
  episodes_this_iter: 9
  episodes_total: 1456
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 3.7615819786832586e-38
          cur_lr: 4.999999873689376e-05
          entropy: 0.7735334038734436
          entropy_coeff: 0.0
          kl: 0.002355406526476145
          model: {}
          policy_loss: 0.009916340000927448
          total_loss: 6.204770565032959
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,140,2475.59,560000,101.346,233.592,-94.6439,711.12


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,140,2475.59,560000,101.346,233.592,-94.6439,711.12


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,140,2475.59,560000,101.346,233.592,-94.6439,711.12


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 564000
  counters:
    num_agent_steps_sampled: 564000
    num_agent_steps_trained: 564000
    num_env_steps_sampled: 564000
    num_env_steps_trained: 564000
  custom_metrics: {}
  date: 2022-07-23_21-51-02
  done: false
  episode_len_mean: 698.37
  episode_media: {}
  episode_reward_max: 233.59161089158928
  episode_reward_mean: 102.44804584304066
  episode_reward_min: -94.64388283527435
  episodes_this_iter: 6
  episodes_total: 1462
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 1.8807909893416293e-38
          cur_lr: 4.999999873689376e-05
          entropy: 0.7619532942771912
          entropy_coeff: 0.0
          kl: 0.004173393361270428
          model: {}
          policy_loss: -0.005805701483041048
          total_loss: 6.500481128692627
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,141,2491.42,564000,102.448,233.592,-94.6439,698.37


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,141,2491.42,564000,102.448,233.592,-94.6439,698.37


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,141,2491.42,564000,102.448,233.592,-94.6439,698.37


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 568000
  counters:
    num_agent_steps_sampled: 568000
    num_agent_steps_trained: 568000
    num_env_steps_sampled: 568000
    num_env_steps_trained: 568000
  custom_metrics: {}
  date: 2022-07-23_21-51-19
  done: false
  episode_len_mean: 676.76
  episode_media: {}
  episode_reward_max: 233.59161089158928
  episode_reward_mean: 103.3410286834263
  episode_reward_min: -94.64388283527435
  episodes_this_iter: 6
  episodes_total: 1468
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7817189693450928
          entropy_coeff: 0.0
          kl: 0.0049225264228880405
          model: {}
          policy_loss: -0.004335145000368357
          total_loss: 5.6963090896606445
          vf_explained_var: 0.140818610787391

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,142,2507.86,568000,103.341,233.592,-94.6439,676.76


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,142,2507.86,568000,103.341,233.592,-94.6439,676.76


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 572000
  counters:
    num_agent_steps_sampled: 572000
    num_agent_steps_trained: 572000
    num_env_steps_sampled: 572000
    num_env_steps_trained: 572000
  custom_metrics: {}
  date: 2022-07-23_21-51-34
  done: false
  episode_len_mean: 674.76
  episode_media: {}
  episode_reward_max: 233.59161089158928
  episode_reward_mean: 103.3131930308959
  episode_reward_min: -94.64388283527435
  episodes_this_iter: 7
  episodes_total: 1475
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8556423187255859
          entropy_coeff: 0.0
          kl: 0.0013639626558870077
          model: {}
          policy_loss: -0.0030274628661572933
          total_loss: 5.286591053009033
          vf_explained_var: 0.097293399274349

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,143,2522.81,572000,103.313,233.592,-94.6439,674.76


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,143,2522.81,572000,103.313,233.592,-94.6439,674.76


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,143,2522.81,572000,103.313,233.592,-94.6439,674.76


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 576000
  counters:
    num_agent_steps_sampled: 576000
    num_agent_steps_trained: 576000
    num_env_steps_sampled: 576000
    num_env_steps_trained: 576000
  custom_metrics: {}
  date: 2022-07-23_21-51-49
  done: false
  episode_len_mean: 652.75
  episode_media: {}
  episode_reward_max: 233.59161089158928
  episode_reward_mean: 101.09517947407629
  episode_reward_min: -94.64388283527435
  episodes_this_iter: 6
  episodes_total: 1481
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8638918399810791
          entropy_coeff: 0.0
          kl: 0.002195246983319521
          model: {}
          policy_loss: -0.0016950590070337057
          total_loss: 5.207679271697998
          vf_explained_var: 0.156082361936569

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,144,2538.2,576000,101.095,233.592,-94.6439,652.75


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,144,2538.2,576000,101.095,233.592,-94.6439,652.75


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,144,2538.2,576000,101.095,233.592,-94.6439,652.75


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 580000
  counters:
    num_agent_steps_sampled: 580000
    num_agent_steps_trained: 580000
    num_env_steps_sampled: 580000
    num_env_steps_trained: 580000
  custom_metrics: {}
  date: 2022-07-23_21-52-05
  done: false
  episode_len_mean: 660.75
  episode_media: {}
  episode_reward_max: 233.59161089158928
  episode_reward_mean: 103.32647023785475
  episode_reward_min: -94.64388283527435
  episodes_this_iter: 7
  episodes_total: 1488
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8216578960418701
          entropy_coeff: 0.0
          kl: 0.0022049134131520987
          model: {}
          policy_loss: -0.004283952061086893
          total_loss: 5.355203151702881
          vf_explained_var: 0.113559849560260

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,145,2554.04,580000,103.326,233.592,-94.6439,660.75


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,145,2554.04,580000,103.326,233.592,-94.6439,660.75


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,145,2554.04,580000,103.326,233.592,-94.6439,660.75


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 584000
  counters:
    num_agent_steps_sampled: 584000
    num_agent_steps_trained: 584000
    num_env_steps_sampled: 584000
    num_env_steps_trained: 584000
  custom_metrics: {}
  date: 2022-07-23_21-52-20
  done: false
  episode_len_mean: 644.57
  episode_media: {}
  episode_reward_max: 233.59161089158928
  episode_reward_mean: 102.60055825629532
  episode_reward_min: -94.64388283527435
  episodes_this_iter: 7
  episodes_total: 1495
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8128715753555298
          entropy_coeff: 0.0
          kl: 0.0027732413727790117
          model: {}
          policy_loss: -0.004561914596706629
          total_loss: 5.9732818603515625
          vf_explained_var: 0.10469127446413

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,146,2569.29,584000,102.601,233.592,-94.6439,644.57


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,146,2569.29,584000,102.601,233.592,-94.6439,644.57


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,146,2569.29,584000,102.601,233.592,-94.6439,644.57


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 588000
  counters:
    num_agent_steps_sampled: 588000
    num_agent_steps_trained: 588000
    num_env_steps_sampled: 588000
    num_env_steps_trained: 588000
  custom_metrics: {}
  date: 2022-07-23_21-52-36
  done: false
  episode_len_mean: 644.64
  episode_media: {}
  episode_reward_max: 233.59161089158928
  episode_reward_mean: 103.22834047797545
  episode_reward_min: -94.64388283527435
  episodes_this_iter: 7
  episodes_total: 1502
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8522500395774841
          entropy_coeff: 0.0
          kl: 0.0017072096234187484
          model: {}
          policy_loss: -0.020205480977892876
          total_loss: 5.143322467803955
          vf_explained_var: 0.160344317555427

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,147,2585.17,588000,103.228,233.592,-94.6439,644.64


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,147,2585.17,588000,103.228,233.592,-94.6439,644.64


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,147,2585.17,588000,103.228,233.592,-94.6439,644.64


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 592000
  counters:
    num_agent_steps_sampled: 592000
    num_agent_steps_trained: 592000
    num_env_steps_sampled: 592000
    num_env_steps_trained: 592000
  custom_metrics: {}
  date: 2022-07-23_21-52-52
  done: false
  episode_len_mean: 661.0
  episode_media: {}
  episode_reward_max: 233.59161089158928
  episode_reward_mean: 106.04659595016827
  episode_reward_min: -94.64388283527435
  episodes_this_iter: 4
  episodes_total: 1506
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7866069078445435
          entropy_coeff: 0.0
          kl: 0.0032672963570803404
          model: {}
          policy_loss: -0.007504058536142111
          total_loss: 4.036984920501709
          vf_explained_var: 0.1461443156003952

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,148,2601.02,592000,106.047,233.592,-94.6439,661


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,148,2601.02,592000,106.047,233.592,-94.6439,661


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 596000
  counters:
    num_agent_steps_sampled: 596000
    num_agent_steps_trained: 596000
    num_env_steps_sampled: 596000
    num_env_steps_trained: 596000
  custom_metrics: {}
  date: 2022-07-23_21-53-07
  done: false
  episode_len_mean: 647.35
  episode_media: {}
  episode_reward_max: 218.1011016320127
  episode_reward_mean: 100.4805056148328
  episode_reward_min: -94.64388283527435
  episodes_this_iter: 6
  episodes_total: 1512
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7941520810127258
          entropy_coeff: 0.0
          kl: 0.0015171053819358349
          model: {}
          policy_loss: -0.00126096629537642
          total_loss: 6.1622633934021
          vf_explained_var: 0.10846678167581558
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,149,2615.77,596000,100.481,218.101,-94.6439,647.35


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,149,2615.77,596000,100.481,218.101,-94.6439,647.35


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 600000
  counters:
    num_agent_steps_sampled: 600000
    num_agent_steps_trained: 600000
    num_env_steps_sampled: 600000
    num_env_steps_trained: 600000
  custom_metrics: {}
  date: 2022-07-23_21-53-22
  done: false
  episode_len_mean: 655.14
  episode_media: {}
  episode_reward_max: 218.1011016320127
  episode_reward_mean: 101.5594201106103
  episode_reward_min: -94.64388283527435
  episodes_this_iter: 4
  episodes_total: 1516
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7671267986297607
          entropy_coeff: 0.0
          kl: 0.001021726755425334
          model: {}
          policy_loss: -0.0030438301619142294
          total_loss: 3.463106155395508
          vf_explained_var: 0.09894409030675888

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,150,2630.84,600000,101.559,218.101,-94.6439,655.14


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,150,2630.84,600000,101.559,218.101,-94.6439,655.14


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 604000
  counters:
    num_agent_steps_sampled: 604000
    num_agent_steps_trained: 604000
    num_env_steps_sampled: 604000
    num_env_steps_trained: 604000
  custom_metrics: {}
  date: 2022-07-23_21-53-37
  done: false
  episode_len_mean: 648.13
  episode_media: {}
  episode_reward_max: 218.1011016320127
  episode_reward_mean: 99.98401843810208
  episode_reward_min: -94.64388283527435
  episodes_this_iter: 6
  episodes_total: 1522
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8181473016738892
          entropy_coeff: 0.0
          kl: 0.002292655874043703
          model: {}
          policy_loss: -0.004239086993038654
          total_loss: 5.049550533294678
          vf_explained_var: 0.1853904128074646
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,151,2645.34,604000,99.984,218.101,-94.6439,648.13


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,151,2645.34,604000,99.984,218.101,-94.6439,648.13


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,151,2645.34,604000,99.984,218.101,-94.6439,648.13


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 608000
  counters:
    num_agent_steps_sampled: 608000
    num_agent_steps_trained: 608000
    num_env_steps_sampled: 608000
    num_env_steps_trained: 608000
  custom_metrics: {}
  date: 2022-07-23_21-53-53
  done: false
  episode_len_mean: 663.43
  episode_media: {}
  episode_reward_max: 244.1008016002857
  episode_reward_mean: 101.34031357015782
  episode_reward_min: -94.64388283527435
  episodes_this_iter: 5
  episodes_total: 1527
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7789772748947144
          entropy_coeff: 0.0
          kl: 0.0009985461365431547
          model: {}
          policy_loss: -0.005914207082241774
          total_loss: 4.8719305992126465
          vf_explained_var: 0.186922654509544

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,152,2661.64,608000,101.34,244.101,-94.6439,663.43


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,152,2661.64,608000,101.34,244.101,-94.6439,663.43


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,152,2661.64,608000,101.34,244.101,-94.6439,663.43


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 612000
  counters:
    num_agent_steps_sampled: 612000
    num_agent_steps_trained: 612000
    num_env_steps_sampled: 612000
    num_env_steps_trained: 612000
  custom_metrics: {}
  date: 2022-07-23_21-54-09
  done: false
  episode_len_mean: 663.4
  episode_media: {}
  episode_reward_max: 244.1008016002857
  episode_reward_mean: 102.0098306318892
  episode_reward_min: -94.64388283527435
  episodes_this_iter: 5
  episodes_total: 1532
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7772614359855652
          entropy_coeff: 0.0
          kl: 0.0016961442306637764
          model: {}
          policy_loss: -0.006545058451592922
          total_loss: 4.568973064422607
          vf_explained_var: 0.22284679114818573


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,153,2677.46,612000,102.01,244.101,-94.6439,663.4


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,153,2677.46,612000,102.01,244.101,-94.6439,663.4


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,153,2677.46,612000,102.01,244.101,-94.6439,663.4


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 616000
  counters:
    num_agent_steps_sampled: 616000
    num_agent_steps_trained: 616000
    num_env_steps_sampled: 616000
    num_env_steps_trained: 616000
  custom_metrics: {}
  date: 2022-07-23_21-54-25
  done: false
  episode_len_mean: 664.66
  episode_media: {}
  episode_reward_max: 244.1008016002857
  episode_reward_mean: 101.67238355482738
  episode_reward_min: -94.64388283527435
  episodes_this_iter: 5
  episodes_total: 1537
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.839768648147583
          entropy_coeff: 0.0
          kl: 0.005554588045924902
          model: {}
          policy_loss: -0.003551095724105835
          total_loss: 4.954401969909668
          vf_explained_var: 0.21106025576591492


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,154,2693.83,616000,101.672,244.101,-94.6439,664.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,154,2693.83,616000,101.672,244.101,-94.6439,664.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,154,2693.83,616000,101.672,244.101,-94.6439,664.66


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 620000
  counters:
    num_agent_steps_sampled: 620000
    num_agent_steps_trained: 620000
    num_env_steps_sampled: 620000
    num_env_steps_trained: 620000
  custom_metrics: {}
  date: 2022-07-23_21-54-41
  done: false
  episode_len_mean: 645.74
  episode_media: {}
  episode_reward_max: 244.1008016002857
  episode_reward_mean: 104.2402296836226
  episode_reward_min: -57.67607976176838
  episodes_this_iter: 8
  episodes_total: 1545
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7269980311393738
          entropy_coeff: 0.0
          kl: 0.001989021897315979
          model: {}
          policy_loss: -0.006229679100215435
          total_loss: 6.5962090492248535
          vf_explained_var: 0.0862138420343399


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,155,2709.13,620000,104.24,244.101,-57.6761,645.74


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,155,2709.13,620000,104.24,244.101,-57.6761,645.74


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,155,2709.13,620000,104.24,244.101,-57.6761,645.74


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 624000
  counters:
    num_agent_steps_sampled: 624000
    num_agent_steps_trained: 624000
    num_env_steps_sampled: 624000
    num_env_steps_trained: 624000
  custom_metrics: {}
  date: 2022-07-23_21-54-57
  done: false
  episode_len_mean: 670.67
  episode_media: {}
  episode_reward_max: 244.1008016002857
  episode_reward_mean: 108.14722950544048
  episode_reward_min: -57.67607976176838
  episodes_this_iter: 7
  episodes_total: 1552
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.6890901923179626
          entropy_coeff: 0.0
          kl: 0.0033143269829452038
          model: {}
          policy_loss: -0.00029011964215897024
          total_loss: 5.016217231750488
          vf_explained_var: 0.10737083107233

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,156,2725.24,624000,108.147,244.101,-57.6761,670.67


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,156,2725.24,624000,108.147,244.101,-57.6761,670.67


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,156,2725.24,624000,108.147,244.101,-57.6761,670.67


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 628000
  counters:
    num_agent_steps_sampled: 628000
    num_agent_steps_trained: 628000
    num_env_steps_sampled: 628000
    num_env_steps_trained: 628000
  custom_metrics: {}
  date: 2022-07-23_21-55-13
  done: false
  episode_len_mean: 684.23
  episode_media: {}
  episode_reward_max: 244.1008016002857
  episode_reward_mean: 109.20076669675962
  episode_reward_min: -57.67607976176838
  episodes_this_iter: 4
  episodes_total: 1556
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8364939093589783
          entropy_coeff: 0.0
          kl: 0.005639976356178522
          model: {}
          policy_loss: -0.010501698590815067
          total_loss: 5.220837593078613
          vf_explained_var: 0.21633665263652802

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,157,2741.32,628000,109.201,244.101,-57.6761,684.23


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,157,2741.32,628000,109.201,244.101,-57.6761,684.23


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,157,2741.32,628000,109.201,244.101,-57.6761,684.23


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 632000
  counters:
    num_agent_steps_sampled: 632000
    num_agent_steps_trained: 632000
    num_env_steps_sampled: 632000
    num_env_steps_trained: 632000
  custom_metrics: {}
  date: 2022-07-23_21-55-28
  done: false
  episode_len_mean: 679.96
  episode_media: {}
  episode_reward_max: 244.1008016002857
  episode_reward_mean: 109.5306742719777
  episode_reward_min: -57.67607976176838
  episodes_this_iter: 5
  episodes_total: 1561
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.778992772102356
          entropy_coeff: 0.0
          kl: 0.00047326830099336803
          model: {}
          policy_loss: -0.0007372151012532413
          total_loss: 6.154269218444824
          vf_explained_var: 0.2088911533355713

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,158,2756.62,632000,109.531,244.101,-57.6761,679.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,158,2756.62,632000,109.531,244.101,-57.6761,679.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,158,2756.62,632000,109.531,244.101,-57.6761,679.96


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 636000
  counters:
    num_agent_steps_sampled: 636000
    num_agent_steps_trained: 636000
    num_env_steps_sampled: 636000
    num_env_steps_trained: 636000
  custom_metrics: {}
  date: 2022-07-23_21-55-44
  done: false
  episode_len_mean: 693.29
  episode_media: {}
  episode_reward_max: 244.1008016002857
  episode_reward_mean: 111.55651820468354
  episode_reward_min: -57.67607976176838
  episodes_this_iter: 4
  episodes_total: 1565
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8409964442253113
          entropy_coeff: 0.0
          kl: 0.006104523781687021
          model: {}
          policy_loss: -0.006975733209401369
          total_loss: 4.481256484985352
          vf_explained_var: 0.10087689757347107

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,159,2772.76,636000,111.557,244.101,-57.6761,693.29


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,159,2772.76,636000,111.557,244.101,-57.6761,693.29


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,159,2772.76,636000,111.557,244.101,-57.6761,693.29


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 640000
  counters:
    num_agent_steps_sampled: 640000
    num_agent_steps_trained: 640000
    num_env_steps_sampled: 640000
    num_env_steps_trained: 640000
  custom_metrics: {}
  date: 2022-07-23_21-56-01
  done: false
  episode_len_mean: 709.7
  episode_media: {}
  episode_reward_max: 244.1008016002857
  episode_reward_mean: 111.71704287871093
  episode_reward_min: -57.67607976176838
  episodes_this_iter: 4
  episodes_total: 1569
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7965477108955383
          entropy_coeff: 0.0
          kl: 0.0036828049924224615
          model: {}
          policy_loss: -0.006046204827725887
          total_loss: 4.394036293029785
          vf_explained_var: 0.14340613782405853

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,160,2788.92,640000,111.717,244.101,-57.6761,709.7


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,160,2788.92,640000,111.717,244.101,-57.6761,709.7


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,160,2788.92,640000,111.717,244.101,-57.6761,709.7


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 644000
  counters:
    num_agent_steps_sampled: 644000
    num_agent_steps_trained: 644000
    num_env_steps_sampled: 644000
    num_env_steps_trained: 644000
  custom_metrics: {}
  date: 2022-07-23_21-56-17
  done: false
  episode_len_mean: 724.66
  episode_media: {}
  episode_reward_max: 244.1008016002857
  episode_reward_mean: 112.8887192827654
  episode_reward_min: -35.89648689971068
  episodes_this_iter: 5
  episodes_total: 1574
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8573205471038818
          entropy_coeff: 0.0
          kl: 0.0014729539398103952
          model: {}
          policy_loss: -0.002287492621690035
          total_loss: 5.32170295715332
          vf_explained_var: 0.202792689204216
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,161,2804.87,644000,112.889,244.101,-35.8965,724.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,161,2804.87,644000,112.889,244.101,-35.8965,724.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,161,2804.87,644000,112.889,244.101,-35.8965,724.66


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 648000
  counters:
    num_agent_steps_sampled: 648000
    num_agent_steps_trained: 648000
    num_env_steps_sampled: 648000
    num_env_steps_trained: 648000
  custom_metrics: {}
  date: 2022-07-23_21-56-33
  done: false
  episode_len_mean: 717.54
  episode_media: {}
  episode_reward_max: 244.1008016002857
  episode_reward_mean: 109.64064672951028
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 8
  episodes_total: 1582
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8320215940475464
          entropy_coeff: 0.0
          kl: 0.003086024895310402
          model: {}
          policy_loss: 0.020048627629876137
          total_loss: 4.894983291625977
          vf_explained_var: 0.126857727766037
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,162,2821,648000,109.641,244.101,-36.1021,717.54


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,162,2821,648000,109.641,244.101,-36.1021,717.54


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,162,2821,648000,109.641,244.101,-36.1021,717.54


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 652000
  counters:
    num_agent_steps_sampled: 652000
    num_agent_steps_trained: 652000
    num_env_steps_sampled: 652000
    num_env_steps_trained: 652000
  custom_metrics: {}
  date: 2022-07-23_21-56-49
  done: false
  episode_len_mean: 724.45
  episode_media: {}
  episode_reward_max: 244.1008016002857
  episode_reward_mean: 108.15650861247542
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 5
  episodes_total: 1587
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8286088109016418
          entropy_coeff: 0.0
          kl: 0.0030365583952516317
          model: {}
          policy_loss: -0.005652022548019886
          total_loss: 4.769936561584473
          vf_explained_var: 0.1626320630311966

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,163,2837.29,652000,108.157,244.101,-36.1021,724.45


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,163,2837.29,652000,108.157,244.101,-36.1021,724.45


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,163,2837.29,652000,108.157,244.101,-36.1021,724.45


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 656000
  counters:
    num_agent_steps_sampled: 656000
    num_agent_steps_trained: 656000
    num_env_steps_sampled: 656000
    num_env_steps_trained: 656000
  custom_metrics: {}
  date: 2022-07-23_21-57-06
  done: false
  episode_len_mean: 732.43
  episode_media: {}
  episode_reward_max: 244.1008016002857
  episode_reward_mean: 106.35718103828134
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 5
  episodes_total: 1592
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8691705465316772
          entropy_coeff: 0.0
          kl: 0.004640595521777868
          model: {}
          policy_loss: -0.0034383246675133705
          total_loss: 5.0811052322387695
          vf_explained_var: 0.222192764282226

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,164,2853.9,656000,106.357,244.101,-36.1021,732.43


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,164,2853.9,656000,106.357,244.101,-36.1021,732.43


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,164,2853.9,656000,106.357,244.101,-36.1021,732.43


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 660000
  counters:
    num_agent_steps_sampled: 660000
    num_agent_steps_trained: 660000
    num_env_steps_sampled: 660000
    num_env_steps_trained: 660000
  custom_metrics: {}
  date: 2022-07-23_21-57-22
  done: false
  episode_len_mean: 740.36
  episode_media: {}
  episode_reward_max: 244.1008016002857
  episode_reward_mean: 106.57046873856187
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 5
  episodes_total: 1597
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8772644996643066
          entropy_coeff: 0.0
          kl: 0.014309667982161045
          model: {}
          policy_loss: -0.0057107084430754185
          total_loss: 4.97708797454834
          vf_explained_var: 0.17396648228168488

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,165,2870.2,660000,106.57,244.101,-36.1021,740.36


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,165,2870.2,660000,106.57,244.101,-36.1021,740.36


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,165,2870.2,660000,106.57,244.101,-36.1021,740.36


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 664000
  counters:
    num_agent_steps_sampled: 664000
    num_agent_steps_trained: 664000
    num_env_steps_sampled: 664000
    num_env_steps_trained: 664000
  custom_metrics: {}
  date: 2022-07-23_21-57-38
  done: false
  episode_len_mean: 755.72
  episode_media: {}
  episode_reward_max: 244.1008016002857
  episode_reward_mean: 107.61040364359111
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 4
  episodes_total: 1601
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8346498608589172
          entropy_coeff: 0.0
          kl: 0.004684052895754576
          model: {}
          policy_loss: -0.004596785642206669
          total_loss: 4.415730953216553
          vf_explained_var: 0.16227923333644867

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,166,2886.1,664000,107.61,244.101,-36.1021,755.72


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,166,2886.1,664000,107.61,244.101,-36.1021,755.72


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,166,2886.1,664000,107.61,244.101,-36.1021,755.72


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 668000
  counters:
    num_agent_steps_sampled: 668000
    num_agent_steps_trained: 668000
    num_env_steps_sampled: 668000
    num_env_steps_trained: 668000
  custom_metrics: {}
  date: 2022-07-23_21-57-55
  done: false
  episode_len_mean: 755.13
  episode_media: {}
  episode_reward_max: 244.1008016002857
  episode_reward_mean: 110.92201849018201
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 5
  episodes_total: 1606
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.787311315536499
          entropy_coeff: 0.0
          kl: 0.0038258321583271027
          model: {}
          policy_loss: 0.01055991742759943
          total_loss: 4.223740100860596
          vf_explained_var: 0.08725886046886444
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,167,2902.45,668000,110.922,244.101,-36.1021,755.13


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,167,2902.45,668000,110.922,244.101,-36.1021,755.13


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,167,2902.45,668000,110.922,244.101,-36.1021,755.13


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 672000
  counters:
    num_agent_steps_sampled: 672000
    num_agent_steps_trained: 672000
    num_env_steps_sampled: 672000
    num_env_steps_trained: 672000
  custom_metrics: {}
  date: 2022-07-23_21-58-11
  done: false
  episode_len_mean: 768.96
  episode_media: {}
  episode_reward_max: 244.1008016002857
  episode_reward_mean: 111.5504485270987
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 4
  episodes_total: 1610
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8401350975036621
          entropy_coeff: 0.0
          kl: 0.011695459485054016
          model: {}
          policy_loss: 0.0014506069710478187
          total_loss: 4.237126350402832
          vf_explained_var: 0.20667479932308197


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,168,2918.63,672000,111.55,244.101,-36.1021,768.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,168,2918.63,672000,111.55,244.101,-36.1021,768.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,168,2918.63,672000,111.55,244.101,-36.1021,768.96


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 676000
  counters:
    num_agent_steps_sampled: 676000
    num_agent_steps_trained: 676000
    num_env_steps_sampled: 676000
    num_env_steps_trained: 676000
  custom_metrics: {}
  date: 2022-07-23_21-58-26
  done: false
  episode_len_mean: 774.97
  episode_media: {}
  episode_reward_max: 244.1008016002857
  episode_reward_mean: 113.3934045881076
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 6
  episodes_total: 1616
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.772972583770752
          entropy_coeff: 0.0
          kl: 0.0007236896781250834
          model: {}
          policy_loss: 0.0013167798751965165
          total_loss: 5.275199890136719
          vf_explained_var: 0.14238539338111877


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,169,2934.03,676000,113.393,244.101,-36.1021,774.97


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,169,2934.03,676000,113.393,244.101,-36.1021,774.97


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,169,2934.03,676000,113.393,244.101,-36.1021,774.97


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 680000
  counters:
    num_agent_steps_sampled: 680000
    num_agent_steps_trained: 680000
    num_env_steps_sampled: 680000
    num_env_steps_trained: 680000
  custom_metrics: {}
  date: 2022-07-23_21-58-42
  done: false
  episode_len_mean: 779.27
  episode_media: {}
  episode_reward_max: 244.1008016002857
  episode_reward_mean: 117.60131466532489
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 5
  episodes_total: 1621
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7803975939750671
          entropy_coeff: 0.0
          kl: 0.001563984784297645
          model: {}
          policy_loss: -0.001843535341322422
          total_loss: 4.49195671081543
          vf_explained_var: 0.09409010410308838


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,170,2949.62,680000,117.601,244.101,-36.1021,779.27


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,170,2949.62,680000,117.601,244.101,-36.1021,779.27


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,170,2949.62,680000,117.601,244.101,-36.1021,779.27


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 684000
  counters:
    num_agent_steps_sampled: 684000
    num_agent_steps_trained: 684000
    num_env_steps_sampled: 684000
    num_env_steps_trained: 684000
  custom_metrics: {}
  date: 2022-07-23_21-58-58
  done: false
  episode_len_mean: 785.87
  episode_media: {}
  episode_reward_max: 241.8606047838683
  episode_reward_mean: 116.82197678012636
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 4
  episodes_total: 1625
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.823503851890564
          entropy_coeff: 0.0
          kl: 0.005044733174145222
          model: {}
          policy_loss: -0.004522891715168953
          total_loss: 5.160323143005371
          vf_explained_var: 0.17007604241371155


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,171,2965.26,684000,116.822,241.861,-36.1021,785.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,171,2965.26,684000,116.822,241.861,-36.1021,785.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,171,2965.26,684000,116.822,241.861,-36.1021,785.87


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 688000
  counters:
    num_agent_steps_sampled: 688000
    num_agent_steps_trained: 688000
    num_env_steps_sampled: 688000
    num_env_steps_trained: 688000
  custom_metrics: {}
  date: 2022-07-23_21-59-14
  done: false
  episode_len_mean: 785.87
  episode_media: {}
  episode_reward_max: 241.8606047838683
  episode_reward_mean: 116.99948842237082
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 4
  episodes_total: 1629
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8154122233390808
          entropy_coeff: 0.0
          kl: 0.0009536804864183068
          model: {}
          policy_loss: -0.0007500676438212395
          total_loss: 3.878143787384033
          vf_explained_var: 0.149565622210502

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,172,2981.29,688000,116.999,241.861,-36.1021,785.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,172,2981.29,688000,116.999,241.861,-36.1021,785.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,172,2981.29,688000,116.999,241.861,-36.1021,785.87


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 692000
  counters:
    num_agent_steps_sampled: 692000
    num_agent_steps_trained: 692000
    num_env_steps_sampled: 692000
    num_env_steps_trained: 692000
  custom_metrics: {}
  date: 2022-07-23_21-59-29
  done: false
  episode_len_mean: 787.74
  episode_media: {}
  episode_reward_max: 241.8606047838683
  episode_reward_mean: 120.88860888937474
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 5
  episodes_total: 1634
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7941100597381592
          entropy_coeff: 0.0
          kl: 0.005389729980379343
          model: {}
          policy_loss: -0.0060229976661503315
          total_loss: 4.222377300262451
          vf_explained_var: 0.1096520572900772

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,173,2996.62,692000,120.889,241.861,-36.1021,787.74


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,173,2996.62,692000,120.889,241.861,-36.1021,787.74


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,173,2996.62,692000,120.889,241.861,-36.1021,787.74


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 696000
  counters:
    num_agent_steps_sampled: 696000
    num_agent_steps_trained: 696000
    num_env_steps_sampled: 696000
    num_env_steps_trained: 696000
  custom_metrics: {}
  date: 2022-07-23_21-59-45
  done: false
  episode_len_mean: 787.74
  episode_media: {}
  episode_reward_max: 241.8606047838683
  episode_reward_mean: 120.83251869908982
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 4
  episodes_total: 1638
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.805518388748169
          entropy_coeff: 0.0
          kl: 0.005731487646698952
          model: {}
          policy_loss: -0.005733089987188578
          total_loss: 3.648496389389038
          vf_explained_var: 0.15053576231002808


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,174,3012.06,696000,120.833,241.861,-36.1021,787.74


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,174,3012.06,696000,120.833,241.861,-36.1021,787.74


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,174,3012.06,696000,120.833,241.861,-36.1021,787.74


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 700000
  counters:
    num_agent_steps_sampled: 700000
    num_agent_steps_trained: 700000
    num_env_steps_sampled: 700000
    num_env_steps_trained: 700000
  custom_metrics: {}
  date: 2022-07-23_22-00-02
  done: false
  episode_len_mean: 814.49
  episode_media: {}
  episode_reward_max: 241.8606047838683
  episode_reward_mean: 120.0155334421659
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 4
  episodes_total: 1642
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8111610412597656
          entropy_coeff: 0.0
          kl: 0.009643945842981339
          model: {}
          policy_loss: -0.005573376081883907
          total_loss: 3.721184015274048
          vf_explained_var: 0.09874472767114639


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,175,3029.45,700000,120.016,241.861,-36.1021,814.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,175,3029.45,700000,120.016,241.861,-36.1021,814.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,175,3029.45,700000,120.016,241.861,-36.1021,814.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,175,3029.45,700000,120.016,241.861,-36.1021,814.49


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 704000
  counters:
    num_agent_steps_sampled: 704000
    num_agent_steps_trained: 704000
    num_env_steps_sampled: 704000
    num_env_steps_trained: 704000
  custom_metrics: {}
  date: 2022-07-23_22-00-23
  done: false
  episode_len_mean: 831.7
  episode_media: {}
  episode_reward_max: 241.8606047838683
  episode_reward_mean: 122.55206419905079
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 4
  episodes_total: 1646
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.801910400390625
          entropy_coeff: 0.0
          kl: 0.015542243607342243
          model: {}
          policy_loss: -0.006884726695716381
          total_loss: 3.706914186477661
          vf_explained_var: 0.15061329305171967
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,176,3049.92,704000,122.552,241.861,-36.1021,831.7


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,176,3049.92,704000,122.552,241.861,-36.1021,831.7


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,176,3049.92,704000,122.552,241.861,-36.1021,831.7


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 708000
  counters:
    num_agent_steps_sampled: 708000
    num_agent_steps_trained: 708000
    num_env_steps_sampled: 708000
    num_env_steps_trained: 708000
  custom_metrics: {}
  date: 2022-07-23_22-00-41
  done: false
  episode_len_mean: 845.55
  episode_media: {}
  episode_reward_max: 241.856816165528
  episode_reward_mean: 123.67990979474688
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 5
  episodes_total: 1651
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7553717494010925
          entropy_coeff: 0.0
          kl: 0.0012324819108471274
          model: {}
          policy_loss: -0.0053042639046907425
          total_loss: 4.452788829803467
          vf_explained_var: 0.0847673341631889

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,177,3068.76,708000,123.68,241.857,-36.1021,845.55


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,177,3068.76,708000,123.68,241.857,-36.1021,845.55


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,177,3068.76,708000,123.68,241.857,-36.1021,845.55


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 712000
  counters:
    num_agent_steps_sampled: 712000
    num_agent_steps_trained: 712000
    num_env_steps_sampled: 712000
    num_env_steps_trained: 712000
  custom_metrics: {}
  date: 2022-07-23_22-00-59
  done: false
  episode_len_mean: 845.55
  episode_media: {}
  episode_reward_max: 241.856816165528
  episode_reward_mean: 123.81860645958517
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 4
  episodes_total: 1655
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8139297962188721
          entropy_coeff: 0.0
          kl: 0.011149322614073753
          model: {}
          policy_loss: -0.006247945129871368
          total_loss: 4.051922798156738
          vf_explained_var: 0.19944220781326294


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,178,3086.42,712000,123.819,241.857,-36.1021,845.55


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,178,3086.42,712000,123.819,241.857,-36.1021,845.55


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,178,3086.42,712000,123.819,241.857,-36.1021,845.55


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 716000
  counters:
    num_agent_steps_sampled: 716000
    num_agent_steps_trained: 716000
    num_env_steps_sampled: 716000
    num_env_steps_trained: 716000
  custom_metrics: {}
  date: 2022-07-23_22-01-18
  done: false
  episode_len_mean: 856.33
  episode_media: {}
  episode_reward_max: 238.9184039869169
  episode_reward_mean: 121.8555330413837
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 4
  episodes_total: 1659
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.815305233001709
          entropy_coeff: 0.0
          kl: 0.008081943728029728
          model: {}
          policy_loss: -0.00673999497666955
          total_loss: 4.087226867675781
          vf_explained_var: 0.1480380743741989
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,179,3104.88,716000,121.856,238.918,-36.1021,856.33


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,179,3104.88,716000,121.856,238.918,-36.1021,856.33


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,179,3104.88,716000,121.856,238.918,-36.1021,856.33


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 720000
  counters:
    num_agent_steps_sampled: 720000
    num_agent_steps_trained: 720000
    num_env_steps_sampled: 720000
    num_env_steps_trained: 720000
  custom_metrics: {}
  date: 2022-07-23_22-01-35
  done: false
  episode_len_mean: 863.8
  episode_media: {}
  episode_reward_max: 238.9184039869169
  episode_reward_mean: 123.46826077259489
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 4
  episodes_total: 1663
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7984464168548584
          entropy_coeff: 0.0
          kl: 0.007719799410551786
          model: {}
          policy_loss: -0.011221954599022865
          total_loss: 3.8714425563812256
          vf_explained_var: 0.19149154424667358

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,180,3122.32,720000,123.468,238.918,-36.1021,863.8


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,180,3122.32,720000,123.468,238.918,-36.1021,863.8


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,180,3122.32,720000,123.468,238.918,-36.1021,863.8


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 724000
  counters:
    num_agent_steps_sampled: 724000
    num_agent_steps_trained: 724000
    num_env_steps_sampled: 724000
    num_env_steps_trained: 724000
  custom_metrics: {}
  date: 2022-07-23_22-01-53
  done: false
  episode_len_mean: 869.66
  episode_media: {}
  episode_reward_max: 238.9184039869169
  episode_reward_mean: 123.54507295551171
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 4
  episodes_total: 1667
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.747336745262146
          entropy_coeff: 0.0
          kl: 0.004101435653865337
          model: {}
          policy_loss: -0.00632408307865262
          total_loss: 4.007336616516113
          vf_explained_var: 0.1274033933877945
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,181,3139.59,724000,123.545,238.918,-36.1021,869.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,181,3139.59,724000,123.545,238.918,-36.1021,869.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,181,3139.59,724000,123.545,238.918,-36.1021,869.66


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 728000
  counters:
    num_agent_steps_sampled: 728000
    num_agent_steps_trained: 728000
    num_env_steps_sampled: 728000
    num_env_steps_trained: 728000
  custom_metrics: {}
  date: 2022-07-23_22-02-10
  done: false
  episode_len_mean: 863.92
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 125.41223442904229
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 5
  episodes_total: 1672
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7247182130813599
          entropy_coeff: 0.0
          kl: 0.0014329655095934868
          model: {}
          policy_loss: -0.0014120214618742466
          total_loss: 5.031916618347168
          vf_explained_var: 0.09749601781368

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,182,3156.94,728000,125.412,251.173,-36.1021,863.92


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,182,3156.94,728000,125.412,251.173,-36.1021,863.92


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,182,3156.94,728000,125.412,251.173,-36.1021,863.92


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 732000
  counters:
    num_agent_steps_sampled: 732000
    num_agent_steps_trained: 732000
    num_env_steps_sampled: 732000
    num_env_steps_trained: 732000
  custom_metrics: {}
  date: 2022-07-23_22-02-27
  done: false
  episode_len_mean: 870.52
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 126.59615914080342
  episode_reward_min: -36.10214031784248
  episodes_this_iter: 5
  episodes_total: 1677
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7676700353622437
          entropy_coeff: 0.0
          kl: 0.0018065859330818057
          model: {}
          policy_loss: -0.004190054722130299
          total_loss: 5.9533586502075195
          vf_explained_var: 0.07922996580600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,183,3173.78,732000,126.596,251.173,-36.1021,870.52


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,183,3173.78,732000,126.596,251.173,-36.1021,870.52


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,183,3173.78,732000,126.596,251.173,-36.1021,870.52


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 736000
  counters:
    num_agent_steps_sampled: 736000
    num_agent_steps_trained: 736000
    num_env_steps_sampled: 736000
    num_env_steps_trained: 736000
  custom_metrics: {}
  date: 2022-07-23_22-02-43
  done: false
  episode_len_mean: 878.11
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 129.1268737808453
  episode_reward_min: -23.32560349962847
  episodes_this_iter: 5
  episodes_total: 1682
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.6872246265411377
          entropy_coeff: 0.0
          kl: 0.0026900460943579674
          model: {}
          policy_loss: -0.0030812995973974466
          total_loss: 5.816677093505859
          vf_explained_var: 0.059297915548086

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,184,3190.29,736000,129.127,251.173,-23.3256,878.11


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,184,3190.29,736000,129.127,251.173,-23.3256,878.11


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,184,3190.29,736000,129.127,251.173,-23.3256,878.11


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 740000
  counters:
    num_agent_steps_sampled: 740000
    num_agent_steps_trained: 740000
    num_env_steps_sampled: 740000
    num_env_steps_trained: 740000
  custom_metrics: {}
  date: 2022-07-23_22-03-01
  done: false
  episode_len_mean: 893.37
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 131.65607418117207
  episode_reward_min: -23.32560349962847
  episodes_this_iter: 4
  episodes_total: 1686
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.782190203666687
          entropy_coeff: 0.0
          kl: 0.0010251251515001059
          model: {}
          policy_loss: 0.000943657651077956
          total_loss: 6.6867995262146
          vf_explained_var: 0.19311632215976715
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,185,3207.59,740000,131.656,251.173,-23.3256,893.37


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,185,3207.59,740000,131.656,251.173,-23.3256,893.37


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,185,3207.59,740000,131.656,251.173,-23.3256,893.37


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 744000
  counters:
    num_agent_steps_sampled: 744000
    num_agent_steps_trained: 744000
    num_env_steps_sampled: 744000
    num_env_steps_trained: 744000
  custom_metrics: {}
  date: 2022-07-23_22-03-18
  done: false
  episode_len_mean: 892.63
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 131.4998149844917
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 5
  episodes_total: 1691
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7485424876213074
          entropy_coeff: 0.0
          kl: 0.00018105455092154443
          model: {}
          policy_loss: -0.0018187157111242414
          total_loss: 5.998162269592285
          vf_explained_var: 0.10921238362789

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,186,3224.55,744000,131.5,251.173,-24.5134,892.63


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,186,3224.55,744000,131.5,251.173,-24.5134,892.63


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,186,3224.55,744000,131.5,251.173,-24.5134,892.63


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 748000
  counters:
    num_agent_steps_sampled: 748000
    num_agent_steps_trained: 748000
    num_env_steps_sampled: 748000
    num_env_steps_trained: 748000
  custom_metrics: {}
  date: 2022-07-23_22-03-34
  done: false
  episode_len_mean: 891.92
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 133.10980435771245
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 4
  episodes_total: 1695
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7694560885429382
          entropy_coeff: 0.0
          kl: 0.02887323498725891
          model: {}
          policy_loss: -0.008486682549118996
          total_loss: 5.2478861808776855
          vf_explained_var: 0.1419872343540191

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,187,3241,748000,133.11,251.173,-24.5134,891.92


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,187,3241,748000,133.11,251.173,-24.5134,891.92


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,187,3241,748000,133.11,251.173,-24.5134,891.92


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 752000
  counters:
    num_agent_steps_sampled: 752000
    num_agent_steps_trained: 752000
    num_env_steps_sampled: 752000
    num_env_steps_trained: 752000
  custom_metrics: {}
  date: 2022-07-23_22-03-51
  done: false
  episode_len_mean: 885.07
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 133.68531019946025
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 5
  episodes_total: 1700
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7125489115715027
          entropy_coeff: 0.0
          kl: 0.0007559814839623868
          model: {}
          policy_loss: -0.006849698256701231
          total_loss: 4.812457084655762
          vf_explained_var: 0.235094949603080

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,188,3257.84,752000,133.685,251.173,-24.5134,885.07


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,188,3257.84,752000,133.685,251.173,-24.5134,885.07


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,188,3257.84,752000,133.685,251.173,-24.5134,885.07


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 756000
  counters:
    num_agent_steps_sampled: 756000
    num_agent_steps_trained: 756000
    num_env_steps_sampled: 756000
    num_env_steps_trained: 756000
  custom_metrics: {}
  date: 2022-07-23_22-04-08
  done: false
  episode_len_mean: 878.0
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 129.45876010288737
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 6
  episodes_total: 1706
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8045023083686829
          entropy_coeff: 0.0
          kl: 0.006283331662416458
          model: {}
          policy_loss: -0.004659353755414486
          total_loss: 5.15512752532959
          vf_explained_var: 0.18484404683113098


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,189,3274.44,756000,129.459,251.173,-24.5134,878


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,189,3274.44,756000,129.459,251.173,-24.5134,878


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,189,3274.44,756000,129.459,251.173,-24.5134,878


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 760000
  counters:
    num_agent_steps_sampled: 760000
    num_agent_steps_trained: 760000
    num_env_steps_sampled: 760000
    num_env_steps_trained: 760000
  custom_metrics: {}
  date: 2022-07-23_22-04-25
  done: false
  episode_len_mean: 878.0
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 129.2893737350897
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 4
  episodes_total: 1710
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8294019103050232
          entropy_coeff: 0.0
          kl: 0.0005527151515707374
          model: {}
          policy_loss: -0.008927062153816223
          total_loss: 3.750420093536377
          vf_explained_var: 0.24831874668598175

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,190,3291.82,760000,129.289,251.173,-24.5134,878


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,190,3291.82,760000,129.289,251.173,-24.5134,878


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,190,3291.82,760000,129.289,251.173,-24.5134,878


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 764000
  counters:
    num_agent_steps_sampled: 764000
    num_agent_steps_trained: 764000
    num_env_steps_sampled: 764000
    num_env_steps_trained: 764000
  custom_metrics: {}
  date: 2022-07-23_22-04-44
  done: false
  episode_len_mean: 886.26
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 130.64358101870206
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 5
  episodes_total: 1715
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7960518002510071
          entropy_coeff: 0.0
          kl: 0.002871451899409294
          model: {}
          policy_loss: -0.02437407709658146
          total_loss: 4.106283187866211
          vf_explained_var: 0.30226513743400574

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,191,3310.22,764000,130.644,251.173,-24.5134,886.26


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,191,3310.22,764000,130.644,251.173,-24.5134,886.26


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,191,3310.22,764000,130.644,251.173,-24.5134,886.26


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 768000
  counters:
    num_agent_steps_sampled: 768000
    num_agent_steps_trained: 768000
    num_env_steps_sampled: 768000
    num_env_steps_trained: 768000
  custom_metrics: {}
  date: 2022-07-23_22-05-01
  done: false
  episode_len_mean: 886.26
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 128.8071282668797
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 4
  episodes_total: 1719
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8370830416679382
          entropy_coeff: 0.0
          kl: 0.0017238970613107085
          model: {}
          policy_loss: -0.005905066151171923
          total_loss: 3.9516212940216064
          vf_explained_var: 0.184827089309692

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,192,3327.91,768000,128.807,251.173,-24.5134,886.26


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,192,3327.91,768000,128.807,251.173,-24.5134,886.26


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,192,3327.91,768000,128.807,251.173,-24.5134,886.26


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 772000
  counters:
    num_agent_steps_sampled: 772000
    num_agent_steps_trained: 772000
    num_env_steps_sampled: 772000
    num_env_steps_trained: 772000
  custom_metrics: {}
  date: 2022-07-23_22-05-19
  done: false
  episode_len_mean: 896.72
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 129.60074258109972
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 4
  episodes_total: 1723
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8097492456436157
          entropy_coeff: 0.0
          kl: 0.0030082473531365395
          model: {}
          policy_loss: -0.00891553983092308
          total_loss: 3.313917875289917
          vf_explained_var: 0.1368272453546524

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,193,3345.66,772000,129.601,251.173,-24.5134,896.72


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,193,3345.66,772000,129.601,251.173,-24.5134,896.72


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,193,3345.66,772000,129.601,251.173,-24.5134,896.72


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 776000
  counters:
    num_agent_steps_sampled: 776000
    num_agent_steps_trained: 776000
    num_env_steps_sampled: 776000
    num_env_steps_trained: 776000
  custom_metrics: {}
  date: 2022-07-23_22-05-37
  done: false
  episode_len_mean: 896.72
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 130.09431946983705
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 4
  episodes_total: 1727
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7845441102981567
          entropy_coeff: 0.0
          kl: 0.015450842678546906
          model: {}
          policy_loss: -0.010034136474132538
          total_loss: 3.1356241703033447
          vf_explained_var: 0.142608746886253

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,194,3362.96,776000,130.094,251.173,-24.5134,896.72


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,194,3362.96,776000,130.094,251.173,-24.5134,896.72


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,194,3362.96,776000,130.094,251.173,-24.5134,896.72


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 780000
  counters:
    num_agent_steps_sampled: 780000
    num_agent_steps_trained: 780000
    num_env_steps_sampled: 780000
    num_env_steps_trained: 780000
  custom_metrics: {}
  date: 2022-07-23_22-05-53
  done: false
  episode_len_mean: 896.17
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 129.17740098501673
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 5
  episodes_total: 1732
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7882334589958191
          entropy_coeff: 0.0
          kl: 0.0009283917024731636
          model: {}
          policy_loss: -0.005798073019832373
          total_loss: 4.4189133644104
          vf_explained_var: 0.18504264950752258

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,195,3379.69,780000,129.177,251.173,-24.5134,896.17


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,195,3379.69,780000,129.177,251.173,-24.5134,896.17


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,195,3379.69,780000,129.177,251.173,-24.5134,896.17


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 784000
  counters:
    num_agent_steps_sampled: 784000
    num_agent_steps_trained: 784000
    num_env_steps_sampled: 784000
    num_env_steps_trained: 784000
  custom_metrics: {}
  date: 2022-07-23_22-06-10
  done: false
  episode_len_mean: 902.96
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 127.91104846052633
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 4
  episodes_total: 1736
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7734858393669128
          entropy_coeff: 0.0
          kl: 0.0010299492860212922
          model: {}
          policy_loss: -0.0060280924662947655
          total_loss: 3.562495470046997
          vf_explained_var: 0.15338003635406

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,196,3396.59,784000,127.911,251.173,-24.5134,902.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,196,3396.59,784000,127.911,251.173,-24.5134,902.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,196,3396.59,784000,127.911,251.173,-24.5134,902.96


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 788000
  counters:
    num_agent_steps_sampled: 788000
    num_agent_steps_trained: 788000
    num_env_steps_sampled: 788000
    num_env_steps_trained: 788000
  custom_metrics: {}
  date: 2022-07-23_22-06-28
  done: false
  episode_len_mean: 902.96
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 128.18764534368367
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 4
  episodes_total: 1740
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7360521554946899
          entropy_coeff: 0.0
          kl: 0.0028813339304178953
          model: {}
          policy_loss: -0.009540420025587082
          total_loss: 4.475174903869629
          vf_explained_var: 0.198067560791969

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,197,3413.65,788000,128.188,251.173,-24.5134,902.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,197,3413.65,788000,128.188,251.173,-24.5134,902.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,197,3413.65,788000,128.188,251.173,-24.5134,902.96


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 792000
  counters:
    num_agent_steps_sampled: 792000
    num_agent_steps_trained: 792000
    num_env_steps_sampled: 792000
    num_env_steps_trained: 792000
  custom_metrics: {}
  date: 2022-07-23_22-06-45
  done: false
  episode_len_mean: 902.96
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 128.82151444598202
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 4
  episodes_total: 1744
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7419378757476807
          entropy_coeff: 0.0
          kl: 0.0014604758471250534
          model: {}
          policy_loss: -0.003631403436884284
          total_loss: 4.756710052490234
          vf_explained_var: 0.195027381181716

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,198,3431.24,792000,128.822,251.173,-24.5134,902.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,198,3431.24,792000,128.822,251.173,-24.5134,902.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,198,3431.24,792000,128.822,251.173,-24.5134,902.96


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 796000
  counters:
    num_agent_steps_sampled: 796000
    num_agent_steps_trained: 796000
    num_env_steps_sampled: 796000
    num_env_steps_trained: 796000
  custom_metrics: {}
  date: 2022-07-23_22-07-02
  done: false
  episode_len_mean: 894.94
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 127.77498485433703
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 4
  episodes_total: 1748
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7802670001983643
          entropy_coeff: 0.0
          kl: 0.012803541496396065
          model: {}
          policy_loss: -0.005378200206905603
          total_loss: 4.68533992767334
          vf_explained_var: 0.1726556271314621


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,199,3448.15,796000,127.775,251.173,-24.5134,894.94


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,199,3448.15,796000,127.775,251.173,-24.5134,894.94


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,199,3448.15,796000,127.775,251.173,-24.5134,894.94


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 800000
  counters:
    num_agent_steps_sampled: 800000
    num_agent_steps_trained: 800000
    num_env_steps_sampled: 800000
    num_env_steps_trained: 800000
  custom_metrics: {}
  date: 2022-07-23_22-07-19
  done: false
  episode_len_mean: 902.84
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 128.74757284406186
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 4
  episodes_total: 1752
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7525433897972107
          entropy_coeff: 0.0
          kl: 0.0017184549942612648
          model: {}
          policy_loss: -0.004826568067073822
          total_loss: 3.2190492153167725
          vf_explained_var: 0.17439256608486

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,200,3465.44,800000,128.748,251.173,-24.5134,902.84


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,200,3465.44,800000,128.748,251.173,-24.5134,902.84


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,200,3465.44,800000,128.748,251.173,-24.5134,902.84


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 804000
  counters:
    num_agent_steps_sampled: 804000
    num_agent_steps_trained: 804000
    num_env_steps_sampled: 804000
    num_env_steps_trained: 804000
  custom_metrics: {}
  date: 2022-07-23_22-07-37
  done: false
  episode_len_mean: 902.84
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 130.1731363323393
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 4
  episodes_total: 1756
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7693940997123718
          entropy_coeff: 0.0
          kl: 0.010089491493999958
          model: {}
          policy_loss: -0.006026609800755978
          total_loss: 4.4159698486328125
          vf_explained_var: 0.2394193857908249

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,201,3482.74,804000,130.173,251.173,-24.5134,902.84


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,201,3482.74,804000,130.173,251.173,-24.5134,902.84


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,201,3482.74,804000,130.173,251.173,-24.5134,902.84


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 808000
  counters:
    num_agent_steps_sampled: 808000
    num_agent_steps_trained: 808000
    num_env_steps_sampled: 808000
    num_env_steps_trained: 808000
  custom_metrics: {}
  date: 2022-07-23_22-07-54
  done: false
  episode_len_mean: 894.5
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 128.47757731198314
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 5
  episodes_total: 1761
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7513739466667175
          entropy_coeff: 0.0
          kl: 0.0018568179802969098
          model: {}
          policy_loss: -0.006840122863650322
          total_loss: 3.5059752464294434
          vf_explained_var: 0.126554831862449

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,202,3500,808000,128.478,251.173,-24.5134,894.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,202,3500,808000,128.478,251.173,-24.5134,894.5


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,202,3500,808000,128.478,251.173,-24.5134,894.5


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 812000
  counters:
    num_agent_steps_sampled: 812000
    num_agent_steps_trained: 812000
    num_env_steps_sampled: 812000
    num_env_steps_trained: 812000
  custom_metrics: {}
  date: 2022-07-23_22-08-11
  done: false
  episode_len_mean: 882.57
  episode_media: {}
  episode_reward_max: 251.17267818858952
  episode_reward_mean: 128.98722640491815
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 5
  episodes_total: 1766
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.6908590197563171
          entropy_coeff: 0.0
          kl: 0.008193247020244598
          model: {}
          policy_loss: -0.004537689033895731
          total_loss: 3.8699493408203125
          vf_explained_var: 0.108025878667831

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,203,3516.41,812000,128.987,251.173,-24.5134,882.57


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,203,3516.41,812000,128.987,251.173,-24.5134,882.57


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,203,3516.41,812000,128.987,251.173,-24.5134,882.57


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 816000
  counters:
    num_agent_steps_sampled: 816000
    num_agent_steps_trained: 816000
    num_env_steps_sampled: 816000
    num_env_steps_trained: 816000
  custom_metrics: {}
  date: 2022-07-23_22-08-27
  done: false
  episode_len_mean: 875.32
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 129.86706854962563
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 6
  episodes_total: 1772
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7149192094802856
          entropy_coeff: 0.0
          kl: 0.01376680750399828
          model: {}
          policy_loss: -0.009253140538930893
          total_loss: 4.5613274574279785
          vf_explained_var: 0.1337666958570480

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,204,3532.66,816000,129.867,267.544,-24.5134,875.32


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,204,3532.66,816000,129.867,267.544,-24.5134,875.32


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,204,3532.66,816000,129.867,267.544,-24.5134,875.32


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 820000
  counters:
    num_agent_steps_sampled: 820000
    num_agent_steps_trained: 820000
    num_env_steps_sampled: 820000
    num_env_steps_trained: 820000
  custom_metrics: {}
  date: 2022-07-23_22-08-44
  done: false
  episode_len_mean: 890.7
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 133.4007987965364
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 4
  episodes_total: 1776
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7237804532051086
          entropy_coeff: 0.0
          kl: 0.0013863072963431478
          model: {}
          policy_loss: -0.005772529169917107
          total_loss: 4.902543067932129
          vf_explained_var: 0.12179335951805115

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,205,3549.31,820000,133.401,267.544,-24.5134,890.7


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,205,3549.31,820000,133.401,267.544,-24.5134,890.7


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,205,3549.31,820000,133.401,267.544,-24.5134,890.7


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 824000
  counters:
    num_agent_steps_sampled: 824000
    num_agent_steps_trained: 824000
    num_env_steps_sampled: 824000
    num_env_steps_trained: 824000
  custom_metrics: {}
  date: 2022-07-23_22-09-01
  done: false
  episode_len_mean: 897.66
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 132.92428840625135
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 4
  episodes_total: 1780
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.741926372051239
          entropy_coeff: 0.0
          kl: 0.0018538510194048285
          model: {}
          policy_loss: -0.004043695516884327
          total_loss: 4.5528106689453125
          vf_explained_var: 0.148915380239486

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,206,3566.61,824000,132.924,267.544,-24.5134,897.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,206,3566.61,824000,132.924,267.544,-24.5134,897.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,206,3566.61,824000,132.924,267.544,-24.5134,897.66


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 828000
  counters:
    num_agent_steps_sampled: 828000
    num_agent_steps_trained: 828000
    num_env_steps_sampled: 828000
    num_env_steps_trained: 828000
  custom_metrics: {}
  date: 2022-07-23_22-09-18
  done: false
  episode_len_mean: 889.83
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 132.60660487705752
  episode_reward_min: -24.51341717263314
  episodes_this_iter: 5
  episodes_total: 1785
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.740083634853363
          entropy_coeff: 0.0
          kl: 0.0010714101372286677
          model: {}
          policy_loss: -0.0063860174268484116
          total_loss: 4.51100492477417
          vf_explained_var: 0.1769330054521560

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,207,3583.68,828000,132.607,267.544,-24.5134,889.83


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,207,3583.68,828000,132.607,267.544,-24.5134,889.83


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,207,3583.68,828000,132.607,267.544,-24.5134,889.83


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 832000
  counters:
    num_agent_steps_sampled: 832000
    num_agent_steps_trained: 832000
    num_env_steps_sampled: 832000
    num_env_steps_trained: 832000
  custom_metrics: {}
  date: 2022-07-23_22-09-35
  done: false
  episode_len_mean: 897.56
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 134.97255628016228
  episode_reward_min: -23.936386914690374
  episodes_this_iter: 4
  episodes_total: 1789
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7559517025947571
          entropy_coeff: 0.0
          kl: 0.003759190673008561
          model: {}
          policy_loss: -0.004864622838795185
          total_loss: 3.5408692359924316
          vf_explained_var: 0.13463610410690

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,208,3600.73,832000,134.973,267.544,-23.9364,897.56


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,208,3600.73,832000,134.973,267.544,-23.9364,897.56


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,208,3600.73,832000,134.973,267.544,-23.9364,897.56


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 836000
  counters:
    num_agent_steps_sampled: 836000
    num_agent_steps_trained: 836000
    num_env_steps_sampled: 836000
    num_env_steps_trained: 836000
  custom_metrics: {}
  date: 2022-07-23_22-09-52
  done: false
  episode_len_mean: 905.62
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 136.665308743524
  episode_reward_min: -23.936386914690374
  episodes_this_iter: 4
  episodes_total: 1793
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7863064408302307
          entropy_coeff: 0.0
          kl: 0.002121371915563941
          model: {}
          policy_loss: -0.00517640495672822
          total_loss: 3.542964458465576
          vf_explained_var: 0.19651560485363007


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,209,3617.42,836000,136.665,267.544,-23.9364,905.62


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,209,3617.42,836000,136.665,267.544,-23.9364,905.62


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,209,3617.42,836000,136.665,267.544,-23.9364,905.62


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 840000
  counters:
    num_agent_steps_sampled: 840000
    num_agent_steps_trained: 840000
    num_env_steps_sampled: 840000
    num_env_steps_trained: 840000
  custom_metrics: {}
  date: 2022-07-23_22-10-08
  done: false
  episode_len_mean: 889.58
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 134.8738876865818
  episode_reward_min: -23.936386914690374
  episodes_this_iter: 7
  episodes_total: 1800
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7601339221000671
          entropy_coeff: 0.0
          kl: 0.004844149108976126
          model: {}
          policy_loss: -0.005673969630151987
          total_loss: 4.8809003829956055
          vf_explained_var: 0.212438434362411

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,210,3633.38,840000,134.874,267.544,-23.9364,889.58


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,210,3633.38,840000,134.874,267.544,-23.9364,889.58


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,210,3633.38,840000,134.874,267.544,-23.9364,889.58


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 844000
  counters:
    num_agent_steps_sampled: 844000
    num_agent_steps_trained: 844000
    num_env_steps_sampled: 844000
    num_env_steps_trained: 844000
  custom_metrics: {}
  date: 2022-07-23_22-10-24
  done: false
  episode_len_mean: 897.54
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 136.2432623152365
  episode_reward_min: -23.936386914690374
  episodes_this_iter: 4
  episodes_total: 1804
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7691769599914551
          entropy_coeff: 0.0
          kl: 0.012172774411737919
          model: {}
          policy_loss: -0.00891165342181921
          total_loss: 3.5746750831604004
          vf_explained_var: 0.1280888319015503

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,211,3649.71,844000,136.243,267.544,-23.9364,897.54


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,211,3649.71,844000,136.243,267.544,-23.9364,897.54


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,211,3649.71,844000,136.243,267.544,-23.9364,897.54


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 848000
  counters:
    num_agent_steps_sampled: 848000
    num_agent_steps_trained: 848000
    num_env_steps_sampled: 848000
    num_env_steps_trained: 848000
  custom_metrics: {}
  date: 2022-07-23_22-10-41
  done: false
  episode_len_mean: 905.1
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 138.2391042264828
  episode_reward_min: -23.936386914690374
  episodes_this_iter: 4
  episodes_total: 1808
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7530409693717957
          entropy_coeff: 0.0
          kl: 0.006442072801291943
          model: {}
          policy_loss: -0.008469329215586185
          total_loss: 3.189253330230713
          vf_explained_var: 0.11654112488031387

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,212,3666.54,848000,138.239,267.544,-23.9364,905.1


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,212,3666.54,848000,138.239,267.544,-23.9364,905.1


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,212,3666.54,848000,138.239,267.544,-23.9364,905.1


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 852000
  counters:
    num_agent_steps_sampled: 852000
    num_agent_steps_trained: 852000
    num_env_steps_sampled: 852000
    num_env_steps_trained: 852000
  custom_metrics: {}
  date: 2022-07-23_22-10-59
  done: false
  episode_len_mean: 905.1
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 139.05604457623508
  episode_reward_min: -23.936386914690374
  episodes_this_iter: 4
  episodes_total: 1812
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7711056470870972
          entropy_coeff: 0.0
          kl: 0.0027975691482424736
          model: {}
          policy_loss: -0.00519789382815361
          total_loss: 3.8117620944976807
          vf_explained_var: 0.181188151240348

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,213,3683.79,852000,139.056,267.544,-23.9364,905.1


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,213,3683.79,852000,139.056,267.544,-23.9364,905.1


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,213,3683.79,852000,139.056,267.544,-23.9364,905.1


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 856000
  counters:
    num_agent_steps_sampled: 856000
    num_agent_steps_trained: 856000
    num_env_steps_sampled: 856000
    num_env_steps_trained: 856000
  custom_metrics: {}
  date: 2022-07-23_22-11-15
  done: false
  episode_len_mean: 912.87
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 140.85799543829032
  episode_reward_min: -23.936386914690374
  episodes_this_iter: 4
  episodes_total: 1816
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7880969047546387
          entropy_coeff: 0.0
          kl: 0.009063034318387508
          model: {}
          policy_loss: -0.006643959786742926
          total_loss: 3.757230758666992
          vf_explained_var: 0.132594630122184

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,214,3700.64,856000,140.858,267.544,-23.9364,912.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,214,3700.64,856000,140.858,267.544,-23.9364,912.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,214,3700.64,856000,140.858,267.544,-23.9364,912.87


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 860000
  counters:
    num_agent_steps_sampled: 860000
    num_agent_steps_trained: 860000
    num_env_steps_sampled: 860000
    num_env_steps_trained: 860000
  custom_metrics: {}
  date: 2022-07-23_22-11-32
  done: false
  episode_len_mean: 912.87
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 141.7570232156859
  episode_reward_min: -23.936386914690374
  episodes_this_iter: 4
  episodes_total: 1820
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7795498371124268
          entropy_coeff: 0.0
          kl: 0.009808402508497238
          model: {}
          policy_loss: -0.005942151881754398
          total_loss: 3.077205181121826
          vf_explained_var: 0.1549436002969741

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,215,3717.33,860000,141.757,267.544,-23.9364,912.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,215,3717.33,860000,141.757,267.544,-23.9364,912.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,215,3717.33,860000,141.757,267.544,-23.9364,912.87


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 864000
  counters:
    num_agent_steps_sampled: 864000
    num_agent_steps_trained: 864000
    num_env_steps_sampled: 864000
    num_env_steps_trained: 864000
  custom_metrics: {}
  date: 2022-07-23_22-11-49
  done: false
  episode_len_mean: 912.87
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 140.86763639090125
  episode_reward_min: -23.936386914690374
  episodes_this_iter: 4
  episodes_total: 1824
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7534162998199463
          entropy_coeff: 0.0
          kl: 0.006018121261149645
          model: {}
          policy_loss: -0.005549153778702021
          total_loss: 3.6125314235687256
          vf_explained_var: 0.17002084851264

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,216,3733.68,864000,140.868,267.544,-23.9364,912.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,216,3733.68,864000,140.868,267.544,-23.9364,912.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,216,3733.68,864000,140.868,267.544,-23.9364,912.87


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 868000
  counters:
    num_agent_steps_sampled: 868000
    num_agent_steps_trained: 868000
    num_env_steps_sampled: 868000
    num_env_steps_trained: 868000
  custom_metrics: {}
  date: 2022-07-23_22-12-05
  done: false
  episode_len_mean: 884.91
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 136.37378366308565
  episode_reward_min: -23.936386914690374
  episodes_this_iter: 7
  episodes_total: 1831
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7541705965995789
          entropy_coeff: 0.0
          kl: 0.0032577859237790108
          model: {}
          policy_loss: -0.0037838618736714125
          total_loss: 6.322413921356201
          vf_explained_var: 0.1925337314605

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,217,3750.29,868000,136.374,267.544,-23.9364,884.91


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,217,3750.29,868000,136.374,267.544,-23.9364,884.91


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,217,3750.29,868000,136.374,267.544,-23.9364,884.91


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 872000
  counters:
    num_agent_steps_sampled: 872000
    num_agent_steps_trained: 872000
    num_env_steps_sampled: 872000
    num_env_steps_trained: 872000
  custom_metrics: {}
  date: 2022-07-23_22-12-22
  done: false
  episode_len_mean: 884.91
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 136.3450403392553
  episode_reward_min: -23.936386914690374
  episodes_this_iter: 4
  episodes_total: 1835
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7938210964202881
          entropy_coeff: 0.0
          kl: 0.0056207068264484406
          model: {}
          policy_loss: -0.007123191840946674
          total_loss: 4.800969123840332
          vf_explained_var: 0.252195954322814

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,218,3767.15,872000,136.345,267.544,-23.9364,884.91


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,218,3767.15,872000,136.345,267.544,-23.9364,884.91


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,218,3767.15,872000,136.345,267.544,-23.9364,884.91


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 876000
  counters:
    num_agent_steps_sampled: 876000
    num_agent_steps_trained: 876000
    num_env_steps_sampled: 876000
    num_env_steps_trained: 876000
  custom_metrics: {}
  date: 2022-07-23_22-12-39
  done: false
  episode_len_mean: 884.91
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 136.15617317520136
  episode_reward_min: -23.936386914690374
  episodes_this_iter: 4
  episodes_total: 1839
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7928760647773743
          entropy_coeff: 0.0
          kl: 0.01890038698911667
          model: {}
          policy_loss: -0.011319402605295181
          total_loss: 3.677990436553955
          vf_explained_var: 0.2662360072135925

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,219,3784.22,876000,136.156,267.544,-23.9364,884.91


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,219,3784.22,876000,136.156,267.544,-23.9364,884.91


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,219,3784.22,876000,136.156,267.544,-23.9364,884.91


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 880000
  counters:
    num_agent_steps_sampled: 880000
    num_agent_steps_trained: 880000
    num_env_steps_sampled: 880000
    num_env_steps_trained: 880000
  custom_metrics: {}
  date: 2022-07-23_22-12-57
  done: false
  episode_len_mean: 884.91
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 135.7945360678951
  episode_reward_min: -23.936386914690374
  episodes_this_iter: 4
  episodes_total: 1843
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8133869171142578
          entropy_coeff: 0.0
          kl: 0.003979589324444532
          model: {}
          policy_loss: -0.005231833551079035
          total_loss: 5.367300033569336
          vf_explained_var: 0.2948097586631775

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,220,3801.59,880000,135.795,267.544,-23.9364,884.91


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,220,3801.59,880000,135.795,267.544,-23.9364,884.91


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,220,3801.59,880000,135.795,267.544,-23.9364,884.91


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 884000
  counters:
    num_agent_steps_sampled: 884000
    num_agent_steps_trained: 884000
    num_env_steps_sampled: 884000
    num_env_steps_trained: 884000
  custom_metrics: {}
  date: 2022-07-23_22-13-14
  done: false
  episode_len_mean: 892.93
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 135.53183551881463
  episode_reward_min: -23.936386914690374
  episodes_this_iter: 4
  episodes_total: 1847
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7948029637336731
          entropy_coeff: 0.0
          kl: 0.009948179125785828
          model: {}
          policy_loss: -0.008012569509446621
          total_loss: 4.1819257736206055
          vf_explained_var: 0.21425957977771

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,221,3819.03,884000,135.532,267.544,-23.9364,892.93


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,221,3819.03,884000,135.532,267.544,-23.9364,892.93


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,221,3819.03,884000,135.532,267.544,-23.9364,892.93


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 888000
  counters:
    num_agent_steps_sampled: 888000
    num_agent_steps_trained: 888000
    num_env_steps_sampled: 888000
    num_env_steps_trained: 888000
  custom_metrics: {}
  date: 2022-07-23_22-13-32
  done: false
  episode_len_mean: 892.93
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 135.2067320452161
  episode_reward_min: -23.936386914690374
  episodes_this_iter: 4
  episodes_total: 1851
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8510425090789795
          entropy_coeff: 0.0
          kl: 0.0019904067739844322
          model: {}
          policy_loss: -0.006022542715072632
          total_loss: 4.301374912261963
          vf_explained_var: 0.270063698291778

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,222,3836.72,888000,135.207,267.544,-23.9364,892.93


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,222,3836.72,888000,135.207,267.544,-23.9364,892.93


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,222,3836.72,888000,135.207,267.544,-23.9364,892.93


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 892000
  counters:
    num_agent_steps_sampled: 892000
    num_agent_steps_trained: 892000
    num_env_steps_sampled: 892000
    num_env_steps_trained: 892000
  custom_metrics: {}
  date: 2022-07-23_22-13-48
  done: false
  episode_len_mean: 892.93
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 134.06373741106387
  episode_reward_min: -23.936386914690374
  episodes_this_iter: 4
  episodes_total: 1855
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8014958500862122
          entropy_coeff: 0.0
          kl: 0.006759721785783768
          model: {}
          policy_loss: -0.006348000839352608
          total_loss: 4.395911693572998
          vf_explained_var: 0.316100299358367

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,223,3853.07,892000,134.064,267.544,-23.9364,892.93


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,223,3853.07,892000,134.064,267.544,-23.9364,892.93


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,223,3853.07,892000,134.064,267.544,-23.9364,892.93


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 896000
  counters:
    num_agent_steps_sampled: 896000
    num_agent_steps_trained: 896000
    num_env_steps_sampled: 896000
    num_env_steps_trained: 896000
  custom_metrics: {}
  date: 2022-07-23_22-14-07
  done: false
  episode_len_mean: 892.93
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 132.91667610980673
  episode_reward_min: -23.936386914690374
  episodes_this_iter: 4
  episodes_total: 1859
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8121545910835266
          entropy_coeff: 0.0
          kl: 0.00410445686429739
          model: {}
          policy_loss: -0.005741168279200792
          total_loss: 4.073586940765381
          vf_explained_var: 0.2204774767160415

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,224,3871.4,896000,132.917,267.544,-23.9364,892.93


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,224,3871.4,896000,132.917,267.544,-23.9364,892.93


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,224,3871.4,896000,132.917,267.544,-23.9364,892.93


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 900000
  counters:
    num_agent_steps_sampled: 900000
    num_agent_steps_trained: 900000
    num_env_steps_sampled: 900000
    num_env_steps_trained: 900000
  custom_metrics: {}
  date: 2022-07-23_22-14-24
  done: false
  episode_len_mean: 911.21
  episode_media: {}
  episode_reward_max: 267.54412338811903
  episode_reward_mean: 134.53466274189813
  episode_reward_min: -3.7665588452728116
  episodes_this_iter: 4
  episodes_total: 1863
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8082665801048279
          entropy_coeff: 0.0
          kl: 0.015401199460029602
          model: {}
          policy_loss: -0.010545392520725727
          total_loss: 3.8599109649658203
          vf_explained_var: 0.29397058486938

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,225,3888.97,900000,134.535,267.544,-3.76656,911.21


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,225,3888.97,900000,134.535,267.544,-3.76656,911.21


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,225,3888.97,900000,134.535,267.544,-3.76656,911.21


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 904000
  counters:
    num_agent_steps_sampled: 904000
    num_agent_steps_trained: 904000
    num_env_steps_sampled: 904000
    num_env_steps_trained: 904000
  custom_metrics: {}
  date: 2022-07-23_22-14-42
  done: false
  episode_len_mean: 914.65
  episode_media: {}
  episode_reward_max: 248.66575114285706
  episode_reward_mean: 131.80069933315775
  episode_reward_min: -3.7665588452728116
  episodes_this_iter: 5
  episodes_total: 1868
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7867379784584045
          entropy_coeff: 0.0
          kl: 0.004080814775079489
          model: {}
          policy_loss: -0.004717092029750347
          total_loss: 4.476983547210693
          vf_explained_var: 0.235293298959732

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,226,3906.28,904000,131.801,248.666,-3.76656,914.65


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,226,3906.28,904000,131.801,248.666,-3.76656,914.65


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,226,3906.28,904000,131.801,248.666,-3.76656,914.65


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 908000
  counters:
    num_agent_steps_sampled: 908000
    num_agent_steps_trained: 908000
    num_env_steps_sampled: 908000
    num_env_steps_trained: 908000
  custom_metrics: {}
  date: 2022-07-23_22-14-59
  done: false
  episode_len_mean: 921.63
  episode_media: {}
  episode_reward_max: 237.19746519562798
  episode_reward_mean: 129.34359043569862
  episode_reward_min: -3.7665588452728116
  episodes_this_iter: 4
  episodes_total: 1872
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8111799955368042
          entropy_coeff: 0.0
          kl: 0.014297062531113625
          model: {}
          policy_loss: -0.008754984475672245
          total_loss: 3.745316743850708
          vf_explained_var: 0.158239930868148

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,227,3923.61,908000,129.344,237.197,-3.76656,921.63


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,227,3923.61,908000,129.344,237.197,-3.76656,921.63


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,227,3923.61,908000,129.344,237.197,-3.76656,921.63


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 912000
  counters:
    num_agent_steps_sampled: 912000
    num_agent_steps_trained: 912000
    num_env_steps_sampled: 912000
    num_env_steps_trained: 912000
  custom_metrics: {}
  date: 2022-07-23_22-15-16
  done: false
  episode_len_mean: 921.63
  episode_media: {}
  episode_reward_max: 237.19746519562798
  episode_reward_mean: 127.65451312530502
  episode_reward_min: -3.7665588452728116
  episodes_this_iter: 4
  episodes_total: 1876
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8210397958755493
          entropy_coeff: 0.0
          kl: 0.0038581881672143936
          model: {}
          policy_loss: 4.049936978844926e-05
          total_loss: 4.259799480438232
          vf_explained_var: 0.19394327700138

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,228,3940.11,912000,127.655,237.197,-3.76656,921.63


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,228,3940.11,912000,127.655,237.197,-3.76656,921.63


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,228,3940.11,912000,127.655,237.197,-3.76656,921.63


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 916000
  counters:
    num_agent_steps_sampled: 916000
    num_agent_steps_trained: 916000
    num_env_steps_sampled: 916000
    num_env_steps_trained: 916000
  custom_metrics: {}
  date: 2022-07-23_22-15-33
  done: false
  episode_len_mean: 921.63
  episode_media: {}
  episode_reward_max: 237.19746519562798
  episode_reward_mean: 127.87469379192036
  episode_reward_min: -3.7665588452728116
  episodes_this_iter: 4
  episodes_total: 1880
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8131909966468811
          entropy_coeff: 0.0
          kl: 0.0055601634085178375
          model: {}
          policy_loss: -0.011898153461515903
          total_loss: 5.1343770027160645
          vf_explained_var: 0.1603628545999

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,229,3957.45,916000,127.875,237.197,-3.76656,921.63


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,229,3957.45,916000,127.875,237.197,-3.76656,921.63


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,229,3957.45,916000,127.875,237.197,-3.76656,921.63


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 920000
  counters:
    num_agent_steps_sampled: 920000
    num_agent_steps_trained: 920000
    num_env_steps_sampled: 920000
    num_env_steps_trained: 920000
  custom_metrics: {}
  date: 2022-07-23_22-15-50
  done: false
  episode_len_mean: 929.46
  episode_media: {}
  episode_reward_max: 237.19746519562798
  episode_reward_mean: 127.39637263801697
  episode_reward_min: -3.7665588452728116
  episodes_this_iter: 4
  episodes_total: 1884
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8544148802757263
          entropy_coeff: 0.0
          kl: 0.006648001726716757
          model: {}
          policy_loss: -0.008481421507894993
          total_loss: 4.509271621704102
          vf_explained_var: 0.175261661410331

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,230,3973.93,920000,127.396,237.197,-3.76656,929.46


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,230,3973.93,920000,127.396,237.197,-3.76656,929.46


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,230,3973.93,920000,127.396,237.197,-3.76656,929.46


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 924000
  counters:
    num_agent_steps_sampled: 924000
    num_agent_steps_trained: 924000
    num_env_steps_sampled: 924000
    num_env_steps_trained: 924000
  custom_metrics: {}
  date: 2022-07-23_22-16-07
  done: false
  episode_len_mean: 929.46
  episode_media: {}
  episode_reward_max: 237.19746519562798
  episode_reward_mean: 126.36148992487138
  episode_reward_min: -3.7665588452728116
  episodes_this_iter: 4
  episodes_total: 1888
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8543726205825806
          entropy_coeff: 0.0
          kl: 0.009777069091796875
          model: {}
          policy_loss: -0.00834816787391901
          total_loss: 4.521230220794678
          vf_explained_var: 0.2505639791488647

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,231,3991.61,924000,126.361,237.197,-3.76656,929.46


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,231,3991.61,924000,126.361,237.197,-3.76656,929.46


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,231,3991.61,924000,126.361,237.197,-3.76656,929.46


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 928000
  counters:
    num_agent_steps_sampled: 928000
    num_agent_steps_trained: 928000
    num_env_steps_sampled: 928000
    num_env_steps_trained: 928000
  custom_metrics: {}
  date: 2022-07-23_22-16-25
  done: false
  episode_len_mean: 937.17
  episode_media: {}
  episode_reward_max: 237.19746519562798
  episode_reward_mean: 126.48743356322679
  episode_reward_min: -3.7665588452728116
  episodes_this_iter: 4
  episodes_total: 1892
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8100153803825378
          entropy_coeff: 0.0
          kl: 0.011262905783951283
          model: {}
          policy_loss: -0.006458040326833725
          total_loss: 4.335499286651611
          vf_explained_var: 0.210386395454406

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,232,4009.46,928000,126.487,237.197,-3.76656,937.17


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,232,4009.46,928000,126.487,237.197,-3.76656,937.17


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,232,4009.46,928000,126.487,237.197,-3.76656,937.17


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 932000
  counters:
    num_agent_steps_sampled: 932000
    num_agent_steps_trained: 932000
    num_env_steps_sampled: 932000
    num_env_steps_trained: 932000
  custom_metrics: {}
  date: 2022-07-23_22-16-43
  done: false
  episode_len_mean: 944.55
  episode_media: {}
  episode_reward_max: 234.07790073110783
  episode_reward_mean: 124.41198980709454
  episode_reward_min: -3.7665588452728116
  episodes_this_iter: 4
  episodes_total: 1896
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8202640414237976
          entropy_coeff: 0.0
          kl: 0.0005569703062064946
          model: {}
          policy_loss: -0.0032812433782964945
          total_loss: 5.022218227386475
          vf_explained_var: 0.1239154636859

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,233,4026.67,932000,124.412,234.078,-3.76656,944.55


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,233,4026.67,932000,124.412,234.078,-3.76656,944.55


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,233,4026.67,932000,124.412,234.078,-3.76656,944.55


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 936000
  counters:
    num_agent_steps_sampled: 936000
    num_agent_steps_trained: 936000
    num_env_steps_sampled: 936000
    num_env_steps_trained: 936000
  custom_metrics: {}
  date: 2022-07-23_22-17-01
  done: false
  episode_len_mean: 960.06
  episode_media: {}
  episode_reward_max: 234.07790073110783
  episode_reward_mean: 126.70801305660807
  episode_reward_min: -3.7665588452728116
  episodes_this_iter: 4
  episodes_total: 1900
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8165621757507324
          entropy_coeff: 0.0
          kl: 0.005069666542112827
          model: {}
          policy_loss: -0.010533168911933899
          total_loss: 4.477601051330566
          vf_explained_var: 0.102042853832244

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,234,4044.62,936000,126.708,234.078,-3.76656,960.06


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,234,4044.62,936000,126.708,234.078,-3.76656,960.06


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,234,4044.62,936000,126.708,234.078,-3.76656,960.06


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 940000
  counters:
    num_agent_steps_sampled: 940000
    num_agent_steps_trained: 940000
    num_env_steps_sampled: 940000
    num_env_steps_trained: 940000
  custom_metrics: {}
  date: 2022-07-23_22-17-19
  done: false
  episode_len_mean: 960.06
  episode_media: {}
  episode_reward_max: 234.07790073110783
  episode_reward_mean: 126.70752124861389
  episode_reward_min: -3.7665588452728116
  episodes_this_iter: 4
  episodes_total: 1904
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7884413599967957
          entropy_coeff: 0.0
          kl: 0.01171764638274908
          model: {}
          policy_loss: -0.012346183881163597
          total_loss: 4.147040843963623
          vf_explained_var: 0.1704216748476028

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,235,4062.73,940000,126.708,234.078,-3.76656,960.06


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,235,4062.73,940000,126.708,234.078,-3.76656,960.06


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,235,4062.73,940000,126.708,234.078,-3.76656,960.06


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 944000
  counters:
    num_agent_steps_sampled: 944000
    num_agent_steps_trained: 944000
    num_env_steps_sampled: 944000
    num_env_steps_trained: 944000
  custom_metrics: {}
  date: 2022-07-23_22-17-36
  done: false
  episode_len_mean: 960.06
  episode_media: {}
  episode_reward_max: 234.07790073110783
  episode_reward_mean: 125.96138020911798
  episode_reward_min: -3.7665588452728116
  episodes_this_iter: 4
  episodes_total: 1908
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8225740790367126
          entropy_coeff: 0.0
          kl: 0.01220228336751461
          model: {}
          policy_loss: -0.0056429095566272736
          total_loss: 4.736837387084961
          vf_explained_var: 0.234628260135650

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,236,4079.87,944000,125.961,234.078,-3.76656,960.06


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,236,4079.87,944000,125.961,234.078,-3.76656,960.06


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,236,4079.87,944000,125.961,234.078,-3.76656,960.06


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 948000
  counters:
    num_agent_steps_sampled: 948000
    num_agent_steps_trained: 948000
    num_env_steps_sampled: 948000
    num_env_steps_trained: 948000
  custom_metrics: {}
  date: 2022-07-23_22-17-53
  done: false
  episode_len_mean: 960.06
  episode_media: {}
  episode_reward_max: 234.07790073110783
  episode_reward_mean: 125.51237095065261
  episode_reward_min: -3.7665588452728116
  episodes_this_iter: 4
  episodes_total: 1912
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8453563451766968
          entropy_coeff: 0.0
          kl: 0.018278811126947403
          model: {}
          policy_loss: -0.014538019895553589
          total_loss: 4.11322546005249
          vf_explained_var: 0.1879886537790298

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,237,4097.05,948000,125.512,234.078,-3.76656,960.06


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,237,4097.05,948000,125.512,234.078,-3.76656,960.06


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,237,4097.05,948000,125.512,234.078,-3.76656,960.06


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 952000
  counters:
    num_agent_steps_sampled: 952000
    num_agent_steps_trained: 952000
    num_env_steps_sampled: 952000
    num_env_steps_trained: 952000
  custom_metrics: {}
  date: 2022-07-23_22-18-11
  done: false
  episode_len_mean: 960.06
  episode_media: {}
  episode_reward_max: 234.07790073110783
  episode_reward_mean: 124.70802932284649
  episode_reward_min: -3.7665588452728116
  episodes_this_iter: 4
  episodes_total: 1916
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8339248299598694
          entropy_coeff: 0.0
          kl: 0.007428342010825872
          model: {}
          policy_loss: -0.0008563136798329651
          total_loss: 4.623056411743164
          vf_explained_var: 0.18065144121646

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,238,4114.4,952000,124.708,234.078,-3.76656,960.06


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,238,4114.4,952000,124.708,234.078,-3.76656,960.06


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,238,4114.4,952000,124.708,234.078,-3.76656,960.06


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 956000
  counters:
    num_agent_steps_sampled: 956000
    num_agent_steps_trained: 956000
    num_env_steps_sampled: 956000
    num_env_steps_trained: 956000
  custom_metrics: {}
  date: 2022-07-23_22-18-28
  done: false
  episode_len_mean: 960.06
  episode_media: {}
  episode_reward_max: 234.07790073110783
  episode_reward_mean: 124.21023282571555
  episode_reward_min: -3.7665588452728116
  episodes_this_iter: 4
  episodes_total: 1920
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8135197758674622
          entropy_coeff: 0.0
          kl: 0.0010087285190820694
          model: {}
          policy_loss: -0.006632224656641483
          total_loss: 3.8803656101226807
          vf_explained_var: 0.1938319653272

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,239,4131.51,956000,124.21,234.078,-3.76656,960.06


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,239,4131.51,956000,124.21,234.078,-3.76656,960.06


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,239,4131.51,956000,124.21,234.078,-3.76656,960.06


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 960000
  counters:
    num_agent_steps_sampled: 960000
    num_agent_steps_trained: 960000
    num_env_steps_sampled: 960000
    num_env_steps_trained: 960000
  custom_metrics: {}
  date: 2022-07-23_22-18-45
  done: false
  episode_len_mean: 960.06
  episode_media: {}
  episode_reward_max: 234.07790073110783
  episode_reward_mean: 123.30618983641104
  episode_reward_min: -3.7665588452728116
  episodes_this_iter: 4
  episodes_total: 1924
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8268868923187256
          entropy_coeff: 0.0
          kl: 0.000376261945348233
          model: {}
          policy_loss: -0.008712639100849628
          total_loss: 3.979790449142456
          vf_explained_var: 0.170520350337028

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,240,4149.01,960000,123.306,234.078,-3.76656,960.06


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,240,4149.01,960000,123.306,234.078,-3.76656,960.06


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,240,4149.01,960000,123.306,234.078,-3.76656,960.06


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 964000
  counters:
    num_agent_steps_sampled: 964000
    num_agent_steps_trained: 964000
    num_env_steps_sampled: 964000
    num_env_steps_trained: 964000
  custom_metrics: {}
  date: 2022-07-23_22-19-03
  done: false
  episode_len_mean: 983.27
  episode_media: {}
  episode_reward_max: 234.07790073110783
  episode_reward_mean: 126.89131553627158
  episode_reward_min: 32.07067599190748
  episodes_this_iter: 4
  episodes_total: 1928
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8317902684211731
          entropy_coeff: 0.0
          kl: 0.003135271370410919
          model: {}
          policy_loss: -0.00979159027338028
          total_loss: 4.236452102661133
          vf_explained_var: 0.19610363245010376


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,241,4166.39,964000,126.891,234.078,32.0707,983.27


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,241,4166.39,964000,126.891,234.078,32.0707,983.27


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,241,4166.39,964000,126.891,234.078,32.0707,983.27


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 968000
  counters:
    num_agent_steps_sampled: 968000
    num_agent_steps_trained: 968000
    num_env_steps_sampled: 968000
    num_env_steps_trained: 968000
  custom_metrics: {}
  date: 2022-07-23_22-19-21
  done: false
  episode_len_mean: 988.9
  episode_media: {}
  episode_reward_max: 234.07790073110783
  episode_reward_mean: 125.58154109068177
  episode_reward_min: -21.669043261838922
  episodes_this_iter: 4
  episodes_total: 1932
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8597216010093689
          entropy_coeff: 0.0
          kl: 0.0004738985444419086
          model: {}
          policy_loss: -0.006269802805036306
          total_loss: 4.250588893890381
          vf_explained_var: 0.326788336038589

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,242,4184.66,968000,125.582,234.078,-21.669,988.9


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,242,4184.66,968000,125.582,234.078,-21.669,988.9


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,242,4184.66,968000,125.582,234.078,-21.669,988.9


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 972000
  counters:
    num_agent_steps_sampled: 972000
    num_agent_steps_trained: 972000
    num_env_steps_sampled: 972000
    num_env_steps_trained: 972000
  custom_metrics: {}
  date: 2022-07-23_22-19-38
  done: false
  episode_len_mean: 983.66
  episode_media: {}
  episode_reward_max: 234.07790073110783
  episode_reward_mean: 122.82278314504092
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 1936
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8690067529678345
          entropy_coeff: 0.0
          kl: 0.007902844808995724
          model: {}
          policy_loss: -0.003281767712906003
          total_loss: 5.550717830657959
          vf_explained_var: 0.2336781769990921

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,243,4201.95,972000,122.823,234.078,-35.4215,983.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,243,4201.95,972000,122.823,234.078,-35.4215,983.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,243,4201.95,972000,122.823,234.078,-35.4215,983.66


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 976000
  counters:
    num_agent_steps_sampled: 976000
    num_agent_steps_trained: 976000
    num_env_steps_sampled: 976000
    num_env_steps_trained: 976000
  custom_metrics: {}
  date: 2022-07-23_22-19-56
  done: false
  episode_len_mean: 983.66
  episode_media: {}
  episode_reward_max: 234.07790073110783
  episode_reward_mean: 122.12890170001121
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 1940
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8257157802581787
          entropy_coeff: 0.0
          kl: 0.009120320901274681
          model: {}
          policy_loss: -0.00712697533890605
          total_loss: 5.043874263763428
          vf_explained_var: 0.33363574743270874

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,244,4219.3,976000,122.129,234.078,-35.4215,983.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,244,4219.3,976000,122.129,234.078,-35.4215,983.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,244,4219.3,976000,122.129,234.078,-35.4215,983.66


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 980000
  counters:
    num_agent_steps_sampled: 980000
    num_agent_steps_trained: 980000
    num_env_steps_sampled: 980000
    num_env_steps_trained: 980000
  custom_metrics: {}
  date: 2022-07-23_22-20-14
  done: false
  episode_len_mean: 983.66
  episode_media: {}
  episode_reward_max: 234.07790073110783
  episode_reward_mean: 121.88808094810058
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 1944
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8066828846931458
          entropy_coeff: 0.0
          kl: 0.00978001207113266
          model: {}
          policy_loss: -0.006638443563133478
          total_loss: 4.214081764221191
          vf_explained_var: 0.24774512648582458

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,245,4236.88,980000,121.888,234.078,-35.4215,983.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,245,4236.88,980000,121.888,234.078,-35.4215,983.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,245,4236.88,980000,121.888,234.078,-35.4215,983.66


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 984000
  counters:
    num_agent_steps_sampled: 984000
    num_agent_steps_trained: 984000
    num_env_steps_sampled: 984000
    num_env_steps_trained: 984000
  custom_metrics: {}
  date: 2022-07-23_22-20-32
  done: false
  episode_len_mean: 983.66
  episode_media: {}
  episode_reward_max: 234.07790073110783
  episode_reward_mean: 121.6173975585712
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 1948
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9004917144775391
          entropy_coeff: 0.0
          kl: 0.004102208651602268
          model: {}
          policy_loss: -0.004152374807745218
          total_loss: 4.708655834197998
          vf_explained_var: 0.28010961413383484

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,246,4255.28,984000,121.617,234.078,-35.4215,983.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,246,4255.28,984000,121.617,234.078,-35.4215,983.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,246,4255.28,984000,121.617,234.078,-35.4215,983.66


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 988000
  counters:
    num_agent_steps_sampled: 988000
    num_agent_steps_trained: 988000
    num_env_steps_sampled: 988000
    num_env_steps_trained: 988000
  custom_metrics: {}
  date: 2022-07-23_22-20-49
  done: false
  episode_len_mean: 983.66
  episode_media: {}
  episode_reward_max: 234.07790073110783
  episode_reward_mean: 120.70354445904401
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 1952
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.877832293510437
          entropy_coeff: 0.0
          kl: 0.0071085914969444275
          model: {}
          policy_loss: -0.006619048770517111
          total_loss: 4.579796314239502
          vf_explained_var: 0.3097229599952698

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,247,4272.51,988000,120.704,234.078,-35.4215,983.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,247,4272.51,988000,120.704,234.078,-35.4215,983.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,247,4272.51,988000,120.704,234.078,-35.4215,983.66


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 992000
  counters:
    num_agent_steps_sampled: 992000
    num_agent_steps_trained: 992000
    num_env_steps_sampled: 992000
    num_env_steps_trained: 992000
  custom_metrics: {}
  date: 2022-07-23_22-21-07
  done: false
  episode_len_mean: 983.66
  episode_media: {}
  episode_reward_max: 234.07790073110783
  episode_reward_mean: 119.26471665968313
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 1956
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7922725677490234
          entropy_coeff: 0.0
          kl: 0.010637380182743073
          model: {}
          policy_loss: -0.008756081573665142
          total_loss: 4.305927276611328
          vf_explained_var: 0.2739664018154144

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,248,4290.02,992000,119.265,234.078,-35.4215,983.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,248,4290.02,992000,119.265,234.078,-35.4215,983.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,248,4290.02,992000,119.265,234.078,-35.4215,983.66


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 996000
  counters:
    num_agent_steps_sampled: 996000
    num_agent_steps_trained: 996000
    num_env_steps_sampled: 996000
    num_env_steps_trained: 996000
  custom_metrics: {}
  date: 2022-07-23_22-21-25
  done: false
  episode_len_mean: 983.66
  episode_media: {}
  episode_reward_max: 234.07790073110783
  episode_reward_mean: 119.58453392543082
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 1960
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.858569860458374
          entropy_coeff: 0.0
          kl: 0.017682291567325592
          model: {}
          policy_loss: -0.008544716984033585
          total_loss: 4.84279203414917
          vf_explained_var: 0.27766677737236023


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,249,4307.76,996000,119.585,234.078,-35.4215,983.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,249,4307.76,996000,119.585,234.078,-35.4215,983.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,249,4307.76,996000,119.585,234.078,-35.4215,983.66


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1000000
  counters:
    num_agent_steps_sampled: 1000000
    num_agent_steps_trained: 1000000
    num_env_steps_sampled: 1000000
    num_env_steps_trained: 1000000
  custom_metrics: {}
  date: 2022-07-23_22-21-43
  done: false
  episode_len_mean: 983.66
  episode_media: {}
  episode_reward_max: 234.07790073110783
  episode_reward_mean: 119.16123529994202
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 1964
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8615998029708862
          entropy_coeff: 0.0
          kl: 0.0037623969838023186
          model: {}
          policy_loss: -0.007012917194515467
          total_loss: 3.966499090194702
          vf_explained_var: 0.2818855643

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,250,4325.99,1000000,119.161,234.078,-35.4215,983.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,250,4325.99,1000000,119.161,234.078,-35.4215,983.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,250,4325.99,1000000,119.161,234.078,-35.4215,983.66


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1004000
  counters:
    num_agent_steps_sampled: 1004000
    num_agent_steps_trained: 1004000
    num_env_steps_sampled: 1004000
    num_env_steps_trained: 1004000
  custom_metrics: {}
  date: 2022-07-23_22-22-01
  done: false
  episode_len_mean: 988.22
  episode_media: {}
  episode_reward_max: 174.41047776632928
  episode_reward_mean: 118.21778029993422
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 1968
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8756096959114075
          entropy_coeff: 0.0
          kl: 0.0015824035508558154
          model: {}
          policy_loss: -0.004301318433135748
          total_loss: 4.441586017608643
          vf_explained_var: 0.3317798972

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,251,4343.79,1004000,118.218,174.41,-35.4215,988.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,251,4343.79,1004000,118.218,174.41,-35.4215,988.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,251,4343.79,1004000,118.218,174.41,-35.4215,988.22


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1008000
  counters:
    num_agent_steps_sampled: 1008000
    num_agent_steps_trained: 1008000
    num_env_steps_sampled: 1008000
    num_env_steps_trained: 1008000
  custom_metrics: {}
  date: 2022-07-23_22-22-20
  done: false
  episode_len_mean: 988.22
  episode_media: {}
  episode_reward_max: 174.41047776632928
  episode_reward_mean: 118.79746465633922
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 1972
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8362451791763306
          entropy_coeff: 0.0
          kl: 0.004338169004768133
          model: {}
          policy_loss: -0.00548029039055109
          total_loss: 3.5394129753112793
          vf_explained_var: 0.22554416954

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,252,4363.4,1008000,118.797,174.41,-35.4215,988.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,252,4363.4,1008000,118.797,174.41,-35.4215,988.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,252,4363.4,1008000,118.797,174.41,-35.4215,988.22


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1012000
  counters:
    num_agent_steps_sampled: 1012000
    num_agent_steps_trained: 1012000
    num_env_steps_sampled: 1012000
    num_env_steps_trained: 1012000
  custom_metrics: {}
  date: 2022-07-23_22-22-40
  done: false
  episode_len_mean: 988.22
  episode_media: {}
  episode_reward_max: 174.41047776632928
  episode_reward_mean: 119.57094179910592
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 1976
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8486189842224121
          entropy_coeff: 0.0
          kl: 0.018296198919415474
          model: {}
          policy_loss: -0.00872656051069498
          total_loss: 4.4221038818359375
          vf_explained_var: 0.30873137712

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,253,4382.99,1012000,119.571,174.41,-35.4215,988.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,253,4382.99,1012000,119.571,174.41,-35.4215,988.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,253,4382.99,1012000,119.571,174.41,-35.4215,988.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,253,4382.99,1012000,119.571,174.41,-35.4215,988.22


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1016000
  counters:
    num_agent_steps_sampled: 1016000
    num_agent_steps_trained: 1016000
    num_env_steps_sampled: 1016000
    num_env_steps_trained: 1016000
  custom_metrics: {}
  date: 2022-07-23_22-23-01
  done: false
  episode_len_mean: 988.22
  episode_media: {}
  episode_reward_max: 174.41047776632928
  episode_reward_mean: 119.0692546213918
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 1980
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8605148196220398
          entropy_coeff: 0.0
          kl: 0.0080466503277421
          model: {}
          policy_loss: -0.005807248875498772
          total_loss: 4.117985248565674
          vf_explained_var: 0.28282687067985

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,254,4403.34,1016000,119.069,174.41,-35.4215,988.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,254,4403.34,1016000,119.069,174.41,-35.4215,988.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,254,4403.34,1016000,119.069,174.41,-35.4215,988.22


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1020000
  counters:
    num_agent_steps_sampled: 1020000
    num_agent_steps_trained: 1020000
    num_env_steps_sampled: 1020000
    num_env_steps_trained: 1020000
  custom_metrics: {}
  date: 2022-07-23_22-23-21
  done: false
  episode_len_mean: 988.22
  episode_media: {}
  episode_reward_max: 174.41047776632928
  episode_reward_mean: 120.06163523809714
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 1984
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8694878220558167
          entropy_coeff: 0.0
          kl: 0.019560126587748528
          model: {}
          policy_loss: -0.010284998454153538
          total_loss: 4.107911109924316
          vf_explained_var: 0.25974702835

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,255,4423.35,1020000,120.062,174.41,-35.4215,988.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,255,4423.35,1020000,120.062,174.41,-35.4215,988.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,255,4423.35,1020000,120.062,174.41,-35.4215,988.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,255,4423.35,1020000,120.062,174.41,-35.4215,988.22


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1024000
  counters:
    num_agent_steps_sampled: 1024000
    num_agent_steps_trained: 1024000
    num_env_steps_sampled: 1024000
    num_env_steps_trained: 1024000
  custom_metrics: {}
  date: 2022-07-23_22-23-42
  done: false
  episode_len_mean: 988.22
  episode_media: {}
  episode_reward_max: 174.41047776632928
  episode_reward_mean: 120.35884331734121
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 1988
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8615373373031616
          entropy_coeff: 0.0
          kl: 0.0030605203937739134
          model: {}
          policy_loss: -0.008471560664474964
          total_loss: 3.8749351501464844
          vf_explained_var: 0.301914483

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,256,4444.16,1024000,120.359,174.41,-35.4215,988.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,256,4444.16,1024000,120.359,174.41,-35.4215,988.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,256,4444.16,1024000,120.359,174.41,-35.4215,988.22


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1028000
  counters:
    num_agent_steps_sampled: 1028000
    num_agent_steps_trained: 1028000
    num_env_steps_sampled: 1028000
    num_env_steps_trained: 1028000
  custom_metrics: {}
  date: 2022-07-23_22-24-02
  done: false
  episode_len_mean: 988.22
  episode_media: {}
  episode_reward_max: 174.41047776632928
  episode_reward_mean: 120.52754996789255
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 1992
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.869752049446106
          entropy_coeff: 0.0
          kl: 0.006266978569328785
          model: {}
          policy_loss: -0.011609633453190327
          total_loss: 4.497551441192627
          vf_explained_var: 0.324199974536

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,257,4464.14,1028000,120.528,174.41,-35.4215,988.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,257,4464.14,1028000,120.528,174.41,-35.4215,988.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,257,4464.14,1028000,120.528,174.41,-35.4215,988.22


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1032000
  counters:
    num_agent_steps_sampled: 1032000
    num_agent_steps_trained: 1032000
    num_env_steps_sampled: 1032000
    num_env_steps_trained: 1032000
  custom_metrics: {}
  date: 2022-07-23_22-24-21
  done: false
  episode_len_mean: 988.22
  episode_media: {}
  episode_reward_max: 174.41047776632928
  episode_reward_mean: 121.07878294034174
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 1996
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8066297173500061
          entropy_coeff: 0.0
          kl: 0.009110264480113983
          model: {}
          policy_loss: -0.008589574135839939
          total_loss: 4.18864631652832
          vf_explained_var: 0.318237960338

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,258,4483.83,1032000,121.079,174.41,-35.4215,988.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,258,4483.83,1032000,121.079,174.41,-35.4215,988.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,258,4483.83,1032000,121.079,174.41,-35.4215,988.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,258,4483.83,1032000,121.079,174.41,-35.4215,988.22


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1036000
  counters:
    num_agent_steps_sampled: 1036000
    num_agent_steps_trained: 1036000
    num_env_steps_sampled: 1036000
    num_env_steps_trained: 1036000
  custom_metrics: {}
  date: 2022-07-23_22-24-42
  done: false
  episode_len_mean: 985.69
  episode_media: {}
  episode_reward_max: 227.39592345059611
  episode_reward_mean: 121.82627301077083
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 5
  episodes_total: 2001
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.834187388420105
          entropy_coeff: 0.0
          kl: 0.007035411428660154
          model: {}
          policy_loss: 0.0037001450546085835
          total_loss: 4.2757110595703125
          vf_explained_var: 0.26173275709

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,259,4504.65,1036000,121.826,227.396,-35.4215,985.69


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,259,4504.65,1036000,121.826,227.396,-35.4215,985.69


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,259,4504.65,1036000,121.826,227.396,-35.4215,985.69


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,259,4504.65,1036000,121.826,227.396,-35.4215,985.69


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1040000
  counters:
    num_agent_steps_sampled: 1040000
    num_agent_steps_trained: 1040000
    num_env_steps_sampled: 1040000
    num_env_steps_trained: 1040000
  custom_metrics: {}
  date: 2022-07-23_22-25-03
  done: false
  episode_len_mean: 979.95
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 122.20705260305724
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 5
  episodes_total: 2006
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8666698336601257
          entropy_coeff: 0.0
          kl: 0.0008746060775592923
          model: {}
          policy_loss: -0.011523823253810406
          total_loss: 5.516748905181885
          vf_explained_var: 0.3182416260

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,260,4525.03,1040000,122.207,251.195,-35.4215,979.95


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,260,4525.03,1040000,122.207,251.195,-35.4215,979.95


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,260,4525.03,1040000,122.207,251.195,-35.4215,979.95


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,260,4525.03,1040000,122.207,251.195,-35.4215,979.95


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1044000
  counters:
    num_agent_steps_sampled: 1044000
    num_agent_steps_trained: 1044000
    num_env_steps_sampled: 1044000
    num_env_steps_trained: 1044000
  custom_metrics: {}
  date: 2022-07-23_22-25-23
  done: false
  episode_len_mean: 979.95
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 122.90094206476152
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 2010
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8408735394477844
          entropy_coeff: 0.0
          kl: 0.003981545567512512
          model: {}
          policy_loss: -0.012817330658435822
          total_loss: 4.832733631134033
          vf_explained_var: 0.38016626238

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,261,4545.48,1044000,122.901,251.195,-35.4215,979.95


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,261,4545.48,1044000,122.901,251.195,-35.4215,979.95


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,261,4545.48,1044000,122.901,251.195,-35.4215,979.95


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,261,4545.48,1044000,122.901,251.195,-35.4215,979.95


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1048000
  counters:
    num_agent_steps_sampled: 1048000
    num_agent_steps_trained: 1048000
    num_env_steps_sampled: 1048000
    num_env_steps_trained: 1048000
  custom_metrics: {}
  date: 2022-07-23_22-25-44
  done: false
  episode_len_mean: 979.95
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 122.66440613828917
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 2014
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8983810544013977
          entropy_coeff: 0.0
          kl: 0.0024776591453701258
          model: {}
          policy_loss: -0.013221818953752518
          total_loss: 3.939589738845825
          vf_explained_var: 0.4280766248

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,262,4566.02,1048000,122.664,251.195,-35.4215,979.95


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,262,4566.02,1048000,122.664,251.195,-35.4215,979.95


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,262,4566.02,1048000,122.664,251.195,-35.4215,979.95


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,262,4566.02,1048000,122.664,251.195,-35.4215,979.95


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1052000
  counters:
    num_agent_steps_sampled: 1052000
    num_agent_steps_trained: 1052000
    num_env_steps_sampled: 1052000
    num_env_steps_trained: 1052000
  custom_metrics: {}
  date: 2022-07-23_22-26-04
  done: false
  episode_len_mean: 979.95
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 122.2930761180278
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 2018
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8503677248954773
          entropy_coeff: 0.0
          kl: 0.0012899257708340883
          model: {}
          policy_loss: -0.013031099922955036
          total_loss: 3.5082507133483887
          vf_explained_var: 0.3710541427

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,263,4586.44,1052000,122.293,251.195,-35.4215,979.95


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,263,4586.44,1052000,122.293,251.195,-35.4215,979.95


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,263,4586.44,1052000,122.293,251.195,-35.4215,979.95


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1056000
  counters:
    num_agent_steps_sampled: 1056000
    num_agent_steps_trained: 1056000
    num_env_steps_sampled: 1056000
    num_env_steps_trained: 1056000
  custom_metrics: {}
  date: 2022-07-23_22-26-23
  done: false
  episode_len_mean: 979.95
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 122.84933170317574
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 2022
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8340613842010498
          entropy_coeff: 0.0
          kl: 0.008845965377986431
          model: {}
          policy_loss: -0.01066103670746088
          total_loss: 3.952362537384033
          vf_explained_var: 0.253721058368

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,264,4605.23,1056000,122.849,251.195,-35.4215,979.95


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,264,4605.23,1056000,122.849,251.195,-35.4215,979.95


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,264,4605.23,1056000,122.849,251.195,-35.4215,979.95


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1060000
  counters:
    num_agent_steps_sampled: 1060000
    num_agent_steps_trained: 1060000
    num_env_steps_sampled: 1060000
    num_env_steps_trained: 1060000
  custom_metrics: {}
  date: 2022-07-23_22-26-42
  done: false
  episode_len_mean: 979.95
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 123.83629222516221
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 2026
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8563536405563354
          entropy_coeff: 0.0
          kl: 0.0019156485795974731
          model: {}
          policy_loss: -0.009100290946662426
          total_loss: 4.2549967765808105
          vf_explained_var: 0.379346787

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,265,4624.41,1060000,123.836,251.195,-35.4215,979.95


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,265,4624.41,1060000,123.836,251.195,-35.4215,979.95


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,265,4624.41,1060000,123.836,251.195,-35.4215,979.95


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1064000
  counters:
    num_agent_steps_sampled: 1064000
    num_agent_steps_trained: 1064000
    num_env_steps_sampled: 1064000
    num_env_steps_trained: 1064000
  custom_metrics: {}
  date: 2022-07-23_22-27-01
  done: false
  episode_len_mean: 986.49
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 126.13764993126551
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 2030
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8773850798606873
          entropy_coeff: 0.0
          kl: 0.0031382811721414328
          model: {}
          policy_loss: -0.004165551159530878
          total_loss: 4.025060176849365
          vf_explained_var: 0.3796920180

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,266,4643.34,1064000,126.138,251.195,-35.4215,986.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,266,4643.34,1064000,126.138,251.195,-35.4215,986.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,266,4643.34,1064000,126.138,251.195,-35.4215,986.49


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1068000
  counters:
    num_agent_steps_sampled: 1068000
    num_agent_steps_trained: 1068000
    num_env_steps_sampled: 1068000
    num_env_steps_trained: 1068000
  custom_metrics: {}
  date: 2022-07-23_22-27-20
  done: false
  episode_len_mean: 986.49
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 125.31508983684652
  episode_reward_min: -35.42150148614293
  episodes_this_iter: 4
  episodes_total: 2034
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8787153959274292
          entropy_coeff: 0.0
          kl: 0.007885877974331379
          model: {}
          policy_loss: -0.007815511897206306
          total_loss: 3.889163017272949
          vf_explained_var: 0.40760564804

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,267,4662.35,1068000,125.315,251.195,-35.4215,986.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,267,4662.35,1068000,125.315,251.195,-35.4215,986.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,267,4662.35,1068000,125.315,251.195,-35.4215,986.49


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1072000
  counters:
    num_agent_steps_sampled: 1072000
    num_agent_steps_trained: 1072000
    num_env_steps_sampled: 1072000
    num_env_steps_trained: 1072000
  custom_metrics: {}
  date: 2022-07-23_22-27-39
  done: false
  episode_len_mean: 991.73
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 127.35391409840155
  episode_reward_min: 76.28286514569885
  episodes_this_iter: 4
  episodes_total: 2038
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8975011110305786
          entropy_coeff: 0.0
          kl: 0.0078103747218847275
          model: {}
          policy_loss: -0.007802617270499468
          total_loss: 4.757366180419922
          vf_explained_var: 0.45862874388

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,268,4680.54,1072000,127.354,251.195,76.2829,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,268,4680.54,1072000,127.354,251.195,76.2829,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,268,4680.54,1072000,127.354,251.195,76.2829,991.73


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1076000
  counters:
    num_agent_steps_sampled: 1076000
    num_agent_steps_trained: 1076000
    num_env_steps_sampled: 1076000
    num_env_steps_trained: 1076000
  custom_metrics: {}
  date: 2022-07-23_22-27-58
  done: false
  episode_len_mean: 991.73
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 126.61199530898097
  episode_reward_min: 76.28286514569885
  episodes_this_iter: 4
  episodes_total: 2042
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9382302165031433
          entropy_coeff: 0.0
          kl: 0.005115128587931395
          model: {}
          policy_loss: -0.014451632276177406
          total_loss: 4.546940326690674
          vf_explained_var: 0.335244864225

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,269,4699.36,1076000,126.612,251.195,76.2829,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,269,4699.36,1076000,126.612,251.195,76.2829,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,269,4699.36,1076000,126.612,251.195,76.2829,991.73


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1080000
  counters:
    num_agent_steps_sampled: 1080000
    num_agent_steps_trained: 1080000
    num_env_steps_sampled: 1080000
    num_env_steps_trained: 1080000
  custom_metrics: {}
  date: 2022-07-23_22-28-17
  done: false
  episode_len_mean: 991.73
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 126.3645977734213
  episode_reward_min: 76.28286514569885
  episodes_this_iter: 4
  episodes_total: 2046
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9188294410705566
          entropy_coeff: 0.0
          kl: 0.015054309740662575
          model: {}
          policy_loss: -0.01711861975491047
          total_loss: 4.342431545257568
          vf_explained_var: 0.33241018652915

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,270,4718.62,1080000,126.365,251.195,76.2829,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,270,4718.62,1080000,126.365,251.195,76.2829,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,270,4718.62,1080000,126.365,251.195,76.2829,991.73


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1084000
  counters:
    num_agent_steps_sampled: 1084000
    num_agent_steps_trained: 1084000
    num_env_steps_sampled: 1084000
    num_env_steps_trained: 1084000
  custom_metrics: {}
  date: 2022-07-23_22-28-35
  done: false
  episode_len_mean: 991.73
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 127.06533923806408
  episode_reward_min: 77.12516436498247
  episodes_this_iter: 4
  episodes_total: 2050
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8737657070159912
          entropy_coeff: 0.0
          kl: 0.007776808459311724
          model: {}
          policy_loss: -0.009776185266673565
          total_loss: 3.802402973175049
          vf_explained_var: 0.434322506189

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,271,4736.76,1084000,127.065,251.195,77.1252,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,271,4736.76,1084000,127.065,251.195,77.1252,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,271,4736.76,1084000,127.065,251.195,77.1252,991.73


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1088000
  counters:
    num_agent_steps_sampled: 1088000
    num_agent_steps_trained: 1088000
    num_env_steps_sampled: 1088000
    num_env_steps_trained: 1088000
  custom_metrics: {}
  date: 2022-07-23_22-28-53
  done: false
  episode_len_mean: 991.73
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 127.80111161612136
  episode_reward_min: 80.28655050805126
  episodes_this_iter: 4
  episodes_total: 2054
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9380154609680176
          entropy_coeff: 0.0
          kl: 0.01056820247322321
          model: {}
          policy_loss: -0.013388475403189659
          total_loss: 3.7257487773895264
          vf_explained_var: 0.366782248020

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,272,4754.36,1088000,127.801,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,272,4754.36,1088000,127.801,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,272,4754.36,1088000,127.801,251.195,80.2866,991.73


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1092000
  counters:
    num_agent_steps_sampled: 1092000
    num_agent_steps_trained: 1092000
    num_env_steps_sampled: 1092000
    num_env_steps_trained: 1092000
  custom_metrics: {}
  date: 2022-07-23_22-29-11
  done: false
  episode_len_mean: 991.73
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 128.71804423629487
  episode_reward_min: 80.28655050805126
  episodes_this_iter: 4
  episodes_total: 2058
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9312064051628113
          entropy_coeff: 0.0
          kl: 0.004718081094324589
          model: {}
          policy_loss: -0.004208606202155352
          total_loss: 3.795936346054077
          vf_explained_var: 0.382118731737

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,273,4772.76,1092000,128.718,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,273,4772.76,1092000,128.718,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,273,4772.76,1092000,128.718,251.195,80.2866,991.73


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1096000
  counters:
    num_agent_steps_sampled: 1096000
    num_agent_steps_trained: 1096000
    num_env_steps_sampled: 1096000
    num_env_steps_trained: 1096000
  custom_metrics: {}
  date: 2022-07-23_22-29-29
  done: false
  episode_len_mean: 991.73
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 128.19274820843205
  episode_reward_min: 80.28655050805126
  episodes_this_iter: 4
  episodes_total: 2062
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9077315330505371
          entropy_coeff: 0.0
          kl: 0.00044333646656014025
          model: {}
          policy_loss: 0.00032975481008179486
          total_loss: 4.341154098510742
          vf_explained_var: 0.431667447

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,274,4790.9,1096000,128.193,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,274,4790.9,1096000,128.193,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,274,4790.9,1096000,128.193,251.195,80.2866,991.73


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1100000
  counters:
    num_agent_steps_sampled: 1100000
    num_agent_steps_trained: 1100000
    num_env_steps_sampled: 1100000
    num_env_steps_trained: 1100000
  custom_metrics: {}
  date: 2022-07-23_22-29-47
  done: false
  episode_len_mean: 991.73
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 128.64278607010786
  episode_reward_min: 80.28655050805126
  episodes_this_iter: 4
  episodes_total: 2066
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.891235888004303
          entropy_coeff: 0.0
          kl: 0.006603453308343887
          model: {}
          policy_loss: -0.01063790637999773
          total_loss: 4.125003814697266
          vf_explained_var: 0.46313729882240

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,275,4808.32,1100000,128.643,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,275,4808.32,1100000,128.643,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,275,4808.32,1100000,128.643,251.195,80.2866,991.73


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1104000
  counters:
    num_agent_steps_sampled: 1104000
    num_agent_steps_trained: 1104000
    num_env_steps_sampled: 1104000
    num_env_steps_trained: 1104000
  custom_metrics: {}
  date: 2022-07-23_22-30-05
  done: false
  episode_len_mean: 991.73
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 128.749650833185
  episode_reward_min: 80.28655050805126
  episodes_this_iter: 4
  episodes_total: 2070
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9556792974472046
          entropy_coeff: 0.0
          kl: 0.012982646934688091
          model: {}
          policy_loss: -0.0049528502859175205
          total_loss: 4.19635009765625
          vf_explained_var: 0.46637588739395

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,276,4826.4,1104000,128.75,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,276,4826.4,1104000,128.75,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,276,4826.4,1104000,128.75,251.195,80.2866,991.73


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1108000
  counters:
    num_agent_steps_sampled: 1108000
    num_agent_steps_trained: 1108000
    num_env_steps_sampled: 1108000
    num_env_steps_trained: 1108000
  custom_metrics: {}
  date: 2022-07-23_22-30-23
  done: false
  episode_len_mean: 991.73
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 129.06940549125483
  episode_reward_min: 80.28655050805126
  episodes_this_iter: 4
  episodes_total: 2074
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.935694694519043
          entropy_coeff: 0.0
          kl: 0.00726044736802578
          model: {}
          policy_loss: -0.010592923499643803
          total_loss: 3.8476641178131104
          vf_explained_var: 0.4284653663635

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,277,4844.36,1108000,129.069,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,277,4844.36,1108000,129.069,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,277,4844.36,1108000,129.069,251.195,80.2866,991.73


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1112000
  counters:
    num_agent_steps_sampled: 1112000
    num_agent_steps_trained: 1112000
    num_env_steps_sampled: 1112000
    num_env_steps_trained: 1112000
  custom_metrics: {}
  date: 2022-07-23_22-30-41
  done: false
  episode_len_mean: 991.73
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 129.60943045350533
  episode_reward_min: 80.28655050805126
  episodes_this_iter: 4
  episodes_total: 2078
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9476720094680786
          entropy_coeff: 0.0
          kl: 0.002102118683978915
          model: {}
          policy_loss: -0.0006830633501522243
          total_loss: 3.714625120162964
          vf_explained_var: 0.40546488761

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,278,4862.18,1112000,129.609,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,278,4862.18,1112000,129.609,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,278,4862.18,1112000,129.609,251.195,80.2866,991.73


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1116000
  counters:
    num_agent_steps_sampled: 1116000
    num_agent_steps_trained: 1116000
    num_env_steps_sampled: 1116000
    num_env_steps_trained: 1116000
  custom_metrics: {}
  date: 2022-07-23_22-30-59
  done: false
  episode_len_mean: 991.73
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 129.61427568996567
  episode_reward_min: 80.28655050805126
  episodes_this_iter: 4
  episodes_total: 2082
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9629116058349609
          entropy_coeff: 0.0
          kl: 0.001208891742862761
          model: {}
          policy_loss: 0.004491153638809919
          total_loss: 3.9538018703460693
          vf_explained_var: 0.464737117290

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,279,4879.92,1116000,129.614,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,279,4879.92,1116000,129.614,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,279,4879.92,1116000,129.614,251.195,80.2866,991.73


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1120000
  counters:
    num_agent_steps_sampled: 1120000
    num_agent_steps_trained: 1120000
    num_env_steps_sampled: 1120000
    num_env_steps_trained: 1120000
  custom_metrics: {}
  date: 2022-07-23_22-31-16
  done: false
  episode_len_mean: 991.73
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 129.80874968129754
  episode_reward_min: 80.28655050805126
  episodes_this_iter: 4
  episodes_total: 2086
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9435794353485107
          entropy_coeff: 0.0
          kl: 0.005837355274707079
          model: {}
          policy_loss: -0.013874699361622334
          total_loss: 4.209340572357178
          vf_explained_var: 0.441996067762

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,280,4897.45,1120000,129.809,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,280,4897.45,1120000,129.809,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,280,4897.45,1120000,129.809,251.195,80.2866,991.73


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1124000
  counters:
    num_agent_steps_sampled: 1124000
    num_agent_steps_trained: 1124000
    num_env_steps_sampled: 1124000
    num_env_steps_trained: 1124000
  custom_metrics: {}
  date: 2022-07-23_22-31-34
  done: false
  episode_len_mean: 991.73
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 129.855750495718
  episode_reward_min: 80.28655050805126
  episodes_this_iter: 4
  episodes_total: 2090
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9744679927825928
          entropy_coeff: 0.0
          kl: 0.0027560091111809015
          model: {}
          policy_loss: -0.00951029360294342
          total_loss: 3.707235813140869
          vf_explained_var: 0.43590673804283

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,281,4915.21,1124000,129.856,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,281,4915.21,1124000,129.856,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,281,4915.21,1124000,129.856,251.195,80.2866,991.73


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1128000
  counters:
    num_agent_steps_sampled: 1128000
    num_agent_steps_trained: 1128000
    num_env_steps_sampled: 1128000
    num_env_steps_trained: 1128000
  custom_metrics: {}
  date: 2022-07-23_22-31-52
  done: false
  episode_len_mean: 991.73
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 130.44724033995263
  episode_reward_min: 80.28655050805126
  episodes_this_iter: 4
  episodes_total: 2094
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9694015383720398
          entropy_coeff: 0.0
          kl: 0.004198296461254358
          model: {}
          policy_loss: -0.007845280691981316
          total_loss: 3.6032538414001465
          vf_explained_var: 0.36320504546

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,282,4932.86,1128000,130.447,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,282,4932.86,1128000,130.447,251.195,80.2866,991.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,282,4932.86,1128000,130.447,251.195,80.2866,991.73


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1132000
  counters:
    num_agent_steps_sampled: 1132000
    num_agent_steps_trained: 1132000
    num_env_steps_sampled: 1132000
    num_env_steps_trained: 1132000
  custom_metrics: {}
  date: 2022-07-23_22-32-09
  done: false
  episode_len_mean: 985.15
  episode_media: {}
  episode_reward_max: 251.19528533342321
  episode_reward_mean: 128.2365801586081
  episode_reward_min: 10.561922275909922
  episodes_this_iter: 5
  episodes_total: 2099
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9295026659965515
          entropy_coeff: 0.0
          kl: 0.008951558731496334
          model: {}
          policy_loss: -0.004792848601937294
          total_loss: 3.678809642791748
          vf_explained_var: 0.434480279684

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,283,4949.88,1132000,128.237,251.195,10.5619,985.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,283,4949.88,1132000,128.237,251.195,10.5619,985.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,283,4949.88,1132000,128.237,251.195,10.5619,985.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1136000
  counters:
    num_agent_steps_sampled: 1136000
    num_agent_steps_trained: 1136000
    num_env_steps_sampled: 1136000
    num_env_steps_trained: 1136000
  custom_metrics: {}
  date: 2022-07-23_22-32-26
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 170.15058997394425
  episode_reward_mean: 124.48209688285466
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2103
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9319729208946228
          entropy_coeff: 0.0
          kl: 0.005713169928640127
          model: {}
          policy_loss: -0.0018462635343894362
          total_loss: 5.176207065582275
          vf_explained_var: 0.31196793913

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,284,4967.08,1136000,124.482,170.151,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,284,4967.08,1136000,124.482,170.151,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,284,4967.08,1136000,124.482,170.151,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1140000
  counters:
    num_agent_steps_sampled: 1140000
    num_agent_steps_trained: 1140000
    num_env_steps_sampled: 1140000
    num_env_steps_trained: 1140000
  custom_metrics: {}
  date: 2022-07-23_22-32-44
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 170.15058997394425
  episode_reward_mean: 123.772636909999
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2107
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9497113823890686
          entropy_coeff: 0.0
          kl: 0.0029357746243476868
          model: {}
          policy_loss: -0.0038225590251386166
          total_loss: 4.503314018249512
          vf_explained_var: 0.449109196662

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,285,4984.77,1140000,123.773,170.151,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,285,4984.77,1140000,123.773,170.151,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,285,4984.77,1140000,123.773,170.151,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,285,4984.77,1140000,123.773,170.151,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1144000
  counters:
    num_agent_steps_sampled: 1144000
    num_agent_steps_trained: 1144000
    num_env_steps_sampled: 1144000
    num_env_steps_trained: 1144000
  custom_metrics: {}
  date: 2022-07-23_22-33-05
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 170.15058997394425
  episode_reward_mean: 123.83828822242386
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2111
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9518249034881592
          entropy_coeff: 0.0
          kl: 0.002426960738375783
          model: {}
          policy_loss: -0.0051365508697927
          total_loss: 3.8323094844818115
          vf_explained_var: 0.4938310086727

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,286,5005.88,1144000,123.838,170.151,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,286,5005.88,1144000,123.838,170.151,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,286,5005.88,1144000,123.838,170.151,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,286,5005.88,1144000,123.838,170.151,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1148000
  counters:
    num_agent_steps_sampled: 1148000
    num_agent_steps_trained: 1148000
    num_env_steps_sampled: 1148000
    num_env_steps_trained: 1148000
  custom_metrics: {}
  date: 2022-07-23_22-33-26
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 170.15058997394425
  episode_reward_mean: 124.48776546453732
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2115
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9325143098831177
          entropy_coeff: 0.0
          kl: 0.01488115731626749
          model: {}
          policy_loss: -0.0052454047836363316
          total_loss: 3.7845876216888428
          vf_explained_var: 0.52630805969

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,287,5026.92,1148000,124.488,170.151,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,287,5026.92,1148000,124.488,170.151,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,287,5026.92,1148000,124.488,170.151,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,287,5026.92,1148000,124.488,170.151,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1152000
  counters:
    num_agent_steps_sampled: 1152000
    num_agent_steps_trained: 1152000
    num_env_steps_sampled: 1152000
    num_env_steps_trained: 1152000
  custom_metrics: {}
  date: 2022-07-23_22-33-48
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 170.15058997394425
  episode_reward_mean: 124.14034851742315
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2119
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9191846251487732
          entropy_coeff: 0.0
          kl: 0.01392326783388853
          model: {}
          policy_loss: -0.005996790248900652
          total_loss: 3.487964153289795
          vf_explained_var: 0.4363620579242

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,288,5048.6,1152000,124.14,170.151,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,288,5048.6,1152000,124.14,170.151,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,288,5048.6,1152000,124.14,170.151,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,288,5048.6,1152000,124.14,170.151,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1156000
  counters:
    num_agent_steps_sampled: 1156000
    num_agent_steps_trained: 1156000
    num_env_steps_sampled: 1156000
    num_env_steps_trained: 1156000
  custom_metrics: {}
  date: 2022-07-23_22-34-09
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 168.93185527005832
  episode_reward_mean: 123.94617500412352
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2123
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8619532585144043
          entropy_coeff: 0.0
          kl: 0.007309141103178263
          model: {}
          policy_loss: -0.005887971725314856
          total_loss: 3.8733456134796143
          vf_explained_var: 0.39468461275

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,289,5069.83,1156000,123.946,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,289,5069.83,1156000,123.946,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,289,5069.83,1156000,123.946,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,289,5069.83,1156000,123.946,168.932,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1160000
  counters:
    num_agent_steps_sampled: 1160000
    num_agent_steps_trained: 1160000
    num_env_steps_sampled: 1160000
    num_env_steps_trained: 1160000
  custom_metrics: {}
  date: 2022-07-23_22-34-30
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 168.93185527005832
  episode_reward_mean: 123.41874870873231
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2127
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9363248944282532
          entropy_coeff: 0.0
          kl: 0.009831976145505905
          model: {}
          policy_loss: -0.003808399196714163
          total_loss: 4.118809700012207
          vf_explained_var: 0.479683846235

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,290,5090.7,1160000,123.419,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,290,5090.7,1160000,123.419,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,290,5090.7,1160000,123.419,168.932,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1164000
  counters:
    num_agent_steps_sampled: 1164000
    num_agent_steps_trained: 1164000
    num_env_steps_sampled: 1164000
    num_env_steps_trained: 1164000
  custom_metrics: {}
  date: 2022-07-23_22-34-50
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 168.93185527005832
  episode_reward_mean: 123.13222888005724
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2131
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9000812768936157
          entropy_coeff: 0.0
          kl: 0.00310154864564538
          model: {}
          policy_loss: -0.003879216965287924
          total_loss: 4.36104679107666
          vf_explained_var: 0.49147254228591

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,291,5110.92,1164000,123.132,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,291,5110.92,1164000,123.132,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,291,5110.92,1164000,123.132,168.932,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1168000
  counters:
    num_agent_steps_sampled: 1168000
    num_agent_steps_trained: 1168000
    num_env_steps_sampled: 1168000
    num_env_steps_trained: 1168000
  custom_metrics: {}
  date: 2022-07-23_22-35-10
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 168.93185527005832
  episode_reward_mean: 123.68271127400857
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2135
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8714547157287598
          entropy_coeff: 0.0
          kl: 0.0083850072696805
          model: {}
          policy_loss: -0.005347184371203184
          total_loss: 4.3889617919921875
          vf_explained_var: 0.5023192763328

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,292,5130.38,1168000,123.683,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,292,5130.38,1168000,123.683,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,292,5130.38,1168000,123.683,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,292,5130.38,1168000,123.683,168.932,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1172000
  counters:
    num_agent_steps_sampled: 1172000
    num_agent_steps_trained: 1172000
    num_env_steps_sampled: 1172000
    num_env_steps_trained: 1172000
  custom_metrics: {}
  date: 2022-07-23_22-35-30
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 168.93185527005832
  episode_reward_mean: 123.8165703466999
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2139
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9198941588401794
          entropy_coeff: 0.0
          kl: 0.0024472700897604227
          model: {}
          policy_loss: -0.010824513621628284
          total_loss: 4.636332988739014
          vf_explained_var: 0.538683235645

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,293,5150.76,1172000,123.817,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,293,5150.76,1172000,123.817,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,293,5150.76,1172000,123.817,168.932,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1176000
  counters:
    num_agent_steps_sampled: 1176000
    num_agent_steps_trained: 1176000
    num_env_steps_sampled: 1176000
    num_env_steps_trained: 1176000
  custom_metrics: {}
  date: 2022-07-23_22-35-50
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 168.93185527005832
  episode_reward_mean: 124.81287061556992
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2143
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.931384801864624
          entropy_coeff: 0.0
          kl: 0.019970590248703957
          model: {}
          policy_loss: -0.006568179465830326
          total_loss: 4.743047714233398
          vf_explained_var: 0.5799658894538

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,294,5170.71,1176000,124.813,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,294,5170.71,1176000,124.813,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,294,5170.71,1176000,124.813,168.932,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1180000
  counters:
    num_agent_steps_sampled: 1180000
    num_agent_steps_trained: 1180000
    num_env_steps_sampled: 1180000
    num_env_steps_trained: 1180000
  custom_metrics: {}
  date: 2022-07-23_22-36-10
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 168.93185527005832
  episode_reward_mean: 125.26318239376911
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2147
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9366280436515808
          entropy_coeff: 0.0
          kl: 0.003840739605948329
          model: {}
          policy_loss: -0.005643773823976517
          total_loss: 3.261364698410034
          vf_explained_var: 0.536997199058

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,295,5190.82,1180000,125.263,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,295,5190.82,1180000,125.263,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,295,5190.82,1180000,125.263,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,295,5190.82,1180000,125.263,168.932,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1184000
  counters:
    num_agent_steps_sampled: 1184000
    num_agent_steps_trained: 1184000
    num_env_steps_sampled: 1184000
    num_env_steps_trained: 1184000
  custom_metrics: {}
  date: 2022-07-23_22-36-31
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 168.93185527005832
  episode_reward_mean: 125.07216605593132
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2151
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9283389449119568
          entropy_coeff: 0.0
          kl: 0.010454106144607067
          model: {}
          policy_loss: -0.006408295594155788
          total_loss: 3.614551305770874
          vf_explained_var: 0.579327940940

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,296,5211.79,1184000,125.072,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,296,5211.79,1184000,125.072,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,296,5211.79,1184000,125.072,168.932,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1188000
  counters:
    num_agent_steps_sampled: 1188000
    num_agent_steps_trained: 1188000
    num_env_steps_sampled: 1188000
    num_env_steps_trained: 1188000
  custom_metrics: {}
  date: 2022-07-23_22-36-51
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 168.93185527005832
  episode_reward_mean: 125.10244218215705
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2155
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9255050420761108
          entropy_coeff: 0.0
          kl: 0.002790869912132621
          model: {}
          policy_loss: -0.005419133231043816
          total_loss: 3.715383529663086
          vf_explained_var: 0.564437091350

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,297,5231.08,1188000,125.102,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,297,5231.08,1188000,125.102,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,297,5231.08,1188000,125.102,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,297,5231.08,1188000,125.102,168.932,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1192000
  counters:
    num_agent_steps_sampled: 1192000
    num_agent_steps_trained: 1192000
    num_env_steps_sampled: 1192000
    num_env_steps_trained: 1192000
  custom_metrics: {}
  date: 2022-07-23_22-37-11
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 168.93185527005832
  episode_reward_mean: 125.32567089046026
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2159
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9902321696281433
          entropy_coeff: 0.0
          kl: 0.011402362026274204
          model: {}
          policy_loss: -0.01231426652520895
          total_loss: 4.600813388824463
          vf_explained_var: 0.4840882718563

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,298,5251.53,1192000,125.326,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,298,5251.53,1192000,125.326,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,298,5251.53,1192000,125.326,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,298,5251.53,1192000,125.326,168.932,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1196000
  counters:
    num_agent_steps_sampled: 1196000
    num_agent_steps_trained: 1196000
    num_env_steps_sampled: 1196000
    num_env_steps_trained: 1196000
  custom_metrics: {}
  date: 2022-07-23_22-37-32
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 168.93185527005832
  episode_reward_mean: 124.26774374728197
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2163
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9792582392692566
          entropy_coeff: 0.0
          kl: 0.007336289621889591
          model: {}
          policy_loss: -0.004876576829701662
          total_loss: 4.057775020599365
          vf_explained_var: 0.583525776863

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,299,5272.26,1196000,124.268,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,299,5272.26,1196000,124.268,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,299,5272.26,1196000,124.268,168.932,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1200000
  counters:
    num_agent_steps_sampled: 1200000
    num_agent_steps_trained: 1200000
    num_env_steps_sampled: 1200000
    num_env_steps_trained: 1200000
  custom_metrics: {}
  date: 2022-07-23_22-37-50
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 168.93185527005832
  episode_reward_mean: 125.19588951775785
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2167
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9670753479003906
          entropy_coeff: 0.0
          kl: 0.006059620063751936
          model: {}
          policy_loss: -0.008744647726416588
          total_loss: 3.368256092071533
          vf_explained_var: 0.519081056118

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,300,5290.57,1200000,125.196,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,300,5290.57,1200000,125.196,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,300,5290.57,1200000,125.196,168.932,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1204000
  counters:
    num_agent_steps_sampled: 1204000
    num_agent_steps_trained: 1204000
    num_env_steps_sampled: 1204000
    num_env_steps_trained: 1204000
  custom_metrics: {}
  date: 2022-07-23_22-38-09
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 168.93185527005832
  episode_reward_mean: 125.35140452942237
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2171
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9648245573043823
          entropy_coeff: 0.0
          kl: 0.006607405375689268
          model: {}
          policy_loss: -0.006190479267388582
          total_loss: 3.1948962211608887
          vf_explained_var: 0.53037875890

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,301,5308.91,1204000,125.351,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,301,5308.91,1204000,125.351,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,301,5308.91,1204000,125.351,168.932,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1208000
  counters:
    num_agent_steps_sampled: 1208000
    num_agent_steps_trained: 1208000
    num_env_steps_sampled: 1208000
    num_env_steps_trained: 1208000
  custom_metrics: {}
  date: 2022-07-23_22-38-27
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 168.93185527005832
  episode_reward_mean: 124.88019858078738
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2175
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9622852802276611
          entropy_coeff: 0.0
          kl: 0.003992205951362848
          model: {}
          policy_loss: -0.005410924553871155
          total_loss: 4.274735450744629
          vf_explained_var: 0.526663064956

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,302,5326.91,1208000,124.88,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,302,5326.91,1208000,124.88,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,302,5326.91,1208000,124.88,168.932,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1212000
  counters:
    num_agent_steps_sampled: 1212000
    num_agent_steps_trained: 1212000
    num_env_steps_sampled: 1212000
    num_env_steps_trained: 1212000
  custom_metrics: {}
  date: 2022-07-23_22-38-44
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 168.93185527005832
  episode_reward_mean: 124.89796647640372
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2179
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9359520673751831
          entropy_coeff: 0.0
          kl: 0.013595223426818848
          model: {}
          policy_loss: -0.005789611022919416
          total_loss: 3.2333385944366455
          vf_explained_var: 0.36994504928

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,303,5344.3,1212000,124.898,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,303,5344.3,1212000,124.898,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,303,5344.3,1212000,124.898,168.932,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1216000
  counters:
    num_agent_steps_sampled: 1216000
    num_agent_steps_trained: 1216000
    num_env_steps_sampled: 1216000
    num_env_steps_trained: 1216000
  custom_metrics: {}
  date: 2022-07-23_22-39-02
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 168.93185527005832
  episode_reward_mean: 125.43065553580482
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2183
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9377458691596985
          entropy_coeff: 0.0
          kl: 0.008761562407016754
          model: {}
          policy_loss: -0.004518591333180666
          total_loss: 3.4349751472473145
          vf_explained_var: 0.43298178911

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,304,5361.99,1216000,125.431,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,304,5361.99,1216000,125.431,168.932,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,304,5361.99,1216000,125.431,168.932,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1220000
  counters:
    num_agent_steps_sampled: 1220000
    num_agent_steps_trained: 1220000
    num_env_steps_sampled: 1220000
    num_env_steps_trained: 1220000
  custom_metrics: {}
  date: 2022-07-23_22-39-20
  done: false
  episode_len_mean: 987.15
  episode_media: {}
  episode_reward_max: 166.05391844730755
  episode_reward_mean: 125.38561970202231
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2187
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9410814046859741
          entropy_coeff: 0.0
          kl: 0.008884917013347149
          model: {}
          policy_loss: -0.0057482412084937096
          total_loss: 3.4073433876037598
          vf_explained_var: 0.4754392504

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,305,5379.49,1220000,125.386,166.054,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,305,5379.49,1220000,125.386,166.054,6.28757,987.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,305,5379.49,1220000,125.386,166.054,6.28757,987.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1224000
  counters:
    num_agent_steps_sampled: 1224000
    num_agent_steps_trained: 1224000
    num_env_steps_sampled: 1224000
    num_env_steps_trained: 1224000
  custom_metrics: {}
  date: 2022-07-23_22-39-38
  done: false
  episode_len_mean: 979.89
  episode_media: {}
  episode_reward_max: 171.0890213301718
  episode_reward_mean: 125.01062071207032
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 5
  episodes_total: 2192
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.911758542060852
          entropy_coeff: 0.0
          kl: 0.0015317648649215698
          model: {}
          policy_loss: -0.0036877079401165247
          total_loss: 4.292648792266846
          vf_explained_var: 0.321490585803

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,306,5397.34,1224000,125.011,171.089,6.28757,979.89


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,306,5397.34,1224000,125.011,171.089,6.28757,979.89


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,306,5397.34,1224000,125.011,171.089,6.28757,979.89


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1228000
  counters:
    num_agent_steps_sampled: 1228000
    num_agent_steps_trained: 1228000
    num_env_steps_sampled: 1228000
    num_env_steps_trained: 1228000
  custom_metrics: {}
  date: 2022-07-23_22-39-55
  done: false
  episode_len_mean: 979.89
  episode_media: {}
  episode_reward_max: 171.0890213301718
  episode_reward_mean: 125.4322001167693
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2196
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9312084317207336
          entropy_coeff: 0.0
          kl: 0.003269122913479805
          model: {}
          policy_loss: -0.00414264528080821
          total_loss: 3.1768808364868164
          vf_explained_var: 0.34981814026832

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,307,5415.2,1228000,125.432,171.089,6.28757,979.89


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,307,5415.2,1228000,125.432,171.089,6.28757,979.89


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,307,5415.2,1228000,125.432,171.089,6.28757,979.89


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1232000
  counters:
    num_agent_steps_sampled: 1232000
    num_agent_steps_trained: 1232000
    num_env_steps_sampled: 1232000
    num_env_steps_trained: 1232000
  custom_metrics: {}
  date: 2022-07-23_22-40-12
  done: false
  episode_len_mean: 986.47
  episode_media: {}
  episode_reward_max: 172.06606318686545
  episode_reward_mean: 127.83920872599752
  episode_reward_min: 6.287567269964214
  episodes_this_iter: 4
  episodes_total: 2200
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9183511137962341
          entropy_coeff: 0.0
          kl: 0.0021731483284384012
          model: {}
          policy_loss: -0.005738191772252321
          total_loss: 3.816256284713745
          vf_explained_var: 0.36178210377

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,308,5432.13,1232000,127.839,172.066,6.28757,986.47


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,308,5432.13,1232000,127.839,172.066,6.28757,986.47


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,308,5432.13,1232000,127.839,172.066,6.28757,986.47


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1236000
  counters:
    num_agent_steps_sampled: 1236000
    num_agent_steps_trained: 1236000
    num_env_steps_sampled: 1236000
    num_env_steps_trained: 1236000
  custom_metrics: {}
  date: 2022-07-23_22-40-30
  done: false
  episode_len_mean: 992.74
  episode_media: {}
  episode_reward_max: 172.06606318686545
  episode_reward_mean: 130.32222553954284
  episode_reward_min: 60.5661799785087
  episodes_this_iter: 4
  episodes_total: 2204
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9243690371513367
          entropy_coeff: 0.0
          kl: 0.004045992158353329
          model: {}
          policy_loss: -0.006895619910210371
          total_loss: 3.484145402908325
          vf_explained_var: 0.2260221391916

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,309,5449.21,1236000,130.322,172.066,60.5662,992.74


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,309,5449.21,1236000,130.322,172.066,60.5662,992.74


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,309,5449.21,1236000,130.322,172.066,60.5662,992.74


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1240000
  counters:
    num_agent_steps_sampled: 1240000
    num_agent_steps_trained: 1240000
    num_env_steps_sampled: 1240000
    num_env_steps_trained: 1240000
  custom_metrics: {}
  date: 2022-07-23_22-40-47
  done: false
  episode_len_mean: 992.74
  episode_media: {}
  episode_reward_max: 172.06606318686545
  episode_reward_mean: 131.11380578224163
  episode_reward_min: 60.5661799785087
  episodes_this_iter: 4
  episodes_total: 2208
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8995529413223267
          entropy_coeff: 0.0
          kl: 0.00124253798276186
          model: {}
          policy_loss: -0.008877784013748169
          total_loss: 3.422699213027954
          vf_explained_var: 0.33257529139518

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,310,5466.55,1240000,131.114,172.066,60.5662,992.74


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,310,5466.55,1240000,131.114,172.066,60.5662,992.74


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,310,5466.55,1240000,131.114,172.066,60.5662,992.74


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1244000
  counters:
    num_agent_steps_sampled: 1244000
    num_agent_steps_trained: 1244000
    num_env_steps_sampled: 1244000
    num_env_steps_trained: 1244000
  custom_metrics: {}
  date: 2022-07-23_22-41-05
  done: false
  episode_len_mean: 992.74
  episode_media: {}
  episode_reward_max: 172.06606318686545
  episode_reward_mean: 130.00353507632587
  episode_reward_min: 60.5661799785087
  episodes_this_iter: 4
  episodes_total: 2212
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8651202917098999
          entropy_coeff: 0.0
          kl: 0.004612908232957125
          model: {}
          policy_loss: -0.0058685108087956905
          total_loss: 3.751335620880127
          vf_explained_var: 0.322596848011

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,311,5484.5,1244000,130.004,172.066,60.5662,992.74


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,311,5484.5,1244000,130.004,172.066,60.5662,992.74


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,311,5484.5,1244000,130.004,172.066,60.5662,992.74


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1248000
  counters:
    num_agent_steps_sampled: 1248000
    num_agent_steps_trained: 1248000
    num_env_steps_sampled: 1248000
    num_env_steps_trained: 1248000
  custom_metrics: {}
  date: 2022-07-23_22-41-22
  done: false
  episode_len_mean: 984.15
  episode_media: {}
  episode_reward_max: 218.51502754244422
  episode_reward_mean: 130.47621816527894
  episode_reward_min: 7.426047744641593
  episodes_this_iter: 5
  episodes_total: 2217
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8607340455055237
          entropy_coeff: 0.0
          kl: 0.008684024214744568
          model: {}
          policy_loss: 0.0008283571805804968
          total_loss: 4.650366306304932
          vf_explained_var: 0.421775966882

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,312,5501.15,1248000,130.476,218.515,7.42605,984.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,312,5501.15,1248000,130.476,218.515,7.42605,984.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,312,5501.15,1248000,130.476,218.515,7.42605,984.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1252000
  counters:
    num_agent_steps_sampled: 1252000
    num_agent_steps_trained: 1252000
    num_env_steps_sampled: 1252000
    num_env_steps_trained: 1252000
  custom_metrics: {}
  date: 2022-07-23_22-41-39
  done: false
  episode_len_mean: 984.15
  episode_media: {}
  episode_reward_max: 218.51502754244422
  episode_reward_mean: 130.1217960748657
  episode_reward_min: 7.426047744641593
  episodes_this_iter: 4
  episodes_total: 2221
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9357361793518066
          entropy_coeff: 0.0
          kl: 0.006869091186672449
          model: {}
          policy_loss: 0.006398654077202082
          total_loss: 4.2988128662109375
          vf_explained_var: 0.4677754640579

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,313,5518.84,1252000,130.122,218.515,7.42605,984.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,313,5518.84,1252000,130.122,218.515,7.42605,984.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,313,5518.84,1252000,130.122,218.515,7.42605,984.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1256000
  counters:
    num_agent_steps_sampled: 1256000
    num_agent_steps_trained: 1256000
    num_env_steps_sampled: 1256000
    num_env_steps_trained: 1256000
  custom_metrics: {}
  date: 2022-07-23_22-41-57
  done: false
  episode_len_mean: 984.15
  episode_media: {}
  episode_reward_max: 218.51502754244422
  episode_reward_mean: 130.79219899134316
  episode_reward_min: 7.426047744641593
  episodes_this_iter: 4
  episodes_total: 2225
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9130144119262695
          entropy_coeff: 0.0
          kl: 0.0014845427358523011
          model: {}
          policy_loss: -0.00277300039306283
          total_loss: 3.194456100463867
          vf_explained_var: 0.465916395187

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,314,5536.28,1256000,130.792,218.515,7.42605,984.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,314,5536.28,1256000,130.792,218.515,7.42605,984.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,314,5536.28,1256000,130.792,218.515,7.42605,984.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1260000
  counters:
    num_agent_steps_sampled: 1260000
    num_agent_steps_trained: 1260000
    num_env_steps_sampled: 1260000
    num_env_steps_trained: 1260000
  custom_metrics: {}
  date: 2022-07-23_22-42-15
  done: false
  episode_len_mean: 976.97
  episode_media: {}
  episode_reward_max: 218.51502754244422
  episode_reward_mean: 129.74922203155575
  episode_reward_min: -6.091755071575804
  episodes_this_iter: 4
  episodes_total: 2229
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9527400732040405
          entropy_coeff: 0.0
          kl: 0.006217289716005325
          model: {}
          policy_loss: -0.007894461043179035
          total_loss: 4.5731048583984375
          vf_explained_var: 0.4382646977

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,315,5554.06,1260000,129.749,218.515,-6.09176,976.97


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,315,5554.06,1260000,129.749,218.515,-6.09176,976.97


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,315,5554.06,1260000,129.749,218.515,-6.09176,976.97


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1264000
  counters:
    num_agent_steps_sampled: 1264000
    num_agent_steps_trained: 1264000
    num_env_steps_sampled: 1264000
    num_env_steps_trained: 1264000
  custom_metrics: {}
  date: 2022-07-23_22-42-31
  done: false
  episode_len_mean: 973.59
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 131.40268487432073
  episode_reward_min: -6.091755071575804
  episodes_this_iter: 5
  episodes_total: 2234
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8908249139785767
          entropy_coeff: 0.0
          kl: 0.0026126569136977196
          model: {}
          policy_loss: 0.002128319814801216
          total_loss: 3.77522873878479
          vf_explained_var: 0.2671996951103

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,316,5570.46,1264000,131.403,226.208,-6.09176,973.59


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,316,5570.46,1264000,131.403,226.208,-6.09176,973.59


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,316,5570.46,1264000,131.403,226.208,-6.09176,973.59


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1268000
  counters:
    num_agent_steps_sampled: 1268000
    num_agent_steps_trained: 1268000
    num_env_steps_sampled: 1268000
    num_env_steps_trained: 1268000
  custom_metrics: {}
  date: 2022-07-23_22-42-49
  done: false
  episode_len_mean: 966.19
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 131.64796251366076
  episode_reward_min: -6.091755071575804
  episodes_this_iter: 4
  episodes_total: 2238
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9451099634170532
          entropy_coeff: 0.0
          kl: 0.0010683819418773055
          model: {}
          policy_loss: -0.0017491776961833239
          total_loss: 4.332082271575928
          vf_explained_var: 0.3292217552

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,317,5587.96,1268000,131.648,226.208,-6.09176,966.19


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,317,5587.96,1268000,131.648,226.208,-6.09176,966.19


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,317,5587.96,1268000,131.648,226.208,-6.09176,966.19


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1272000
  counters:
    num_agent_steps_sampled: 1272000
    num_agent_steps_trained: 1272000
    num_env_steps_sampled: 1272000
    num_env_steps_trained: 1272000
  custom_metrics: {}
  date: 2022-07-23_22-43-06
  done: false
  episode_len_mean: 966.19
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 130.90175430316904
  episode_reward_min: -6.091755071575804
  episodes_this_iter: 4
  episodes_total: 2242
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.988464891910553
          entropy_coeff: 0.0
          kl: 0.005380747839808464
          model: {}
          policy_loss: -0.0061035798862576485
          total_loss: 3.2307028770446777
          vf_explained_var: 0.47401455044

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,318,5605.53,1272000,130.902,226.208,-6.09176,966.19


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,318,5605.53,1272000,130.902,226.208,-6.09176,966.19


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,318,5605.53,1272000,130.902,226.208,-6.09176,966.19


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1276000
  counters:
    num_agent_steps_sampled: 1276000
    num_agent_steps_trained: 1276000
    num_env_steps_sampled: 1276000
    num_env_steps_trained: 1276000
  custom_metrics: {}
  date: 2022-07-23_22-43-24
  done: false
  episode_len_mean: 966.19
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 130.94265740221093
  episode_reward_min: -6.091755071575804
  episodes_this_iter: 4
  episodes_total: 2246
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9699339270591736
          entropy_coeff: 0.0
          kl: 0.002490295795723796
          model: {}
          policy_loss: -0.006518048234283924
          total_loss: 3.087394952774048
          vf_explained_var: 0.462797254323

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,319,5623.03,1276000,130.943,226.208,-6.09176,966.19


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,319,5623.03,1276000,130.943,226.208,-6.09176,966.19


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,319,5623.03,1276000,130.943,226.208,-6.09176,966.19


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1280000
  counters:
    num_agent_steps_sampled: 1280000
    num_agent_steps_trained: 1280000
    num_env_steps_sampled: 1280000
    num_env_steps_trained: 1280000
  custom_metrics: {}
  date: 2022-07-23_22-43-40
  done: false
  episode_len_mean: 951.56
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 129.7801773880815
  episode_reward_min: -6.091755071575804
  episodes_this_iter: 6
  episodes_total: 2252
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9231645464897156
          entropy_coeff: 0.0
          kl: 0.002076496137306094
          model: {}
          policy_loss: -0.002472394611686468
          total_loss: 3.645040273666382
          vf_explained_var: 0.4098304808139

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,320,5638.88,1280000,129.78,226.208,-6.09176,951.56


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,320,5638.88,1280000,129.78,226.208,-6.09176,951.56


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,320,5638.88,1280000,129.78,226.208,-6.09176,951.56


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1284000
  counters:
    num_agent_steps_sampled: 1284000
    num_agent_steps_trained: 1284000
    num_env_steps_sampled: 1284000
    num_env_steps_trained: 1284000
  custom_metrics: {}
  date: 2022-07-23_22-43-57
  done: false
  episode_len_mean: 951.56
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 130.73237661781386
  episode_reward_min: -6.091755071575804
  episodes_this_iter: 4
  episodes_total: 2256
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9634568691253662
          entropy_coeff: 0.0
          kl: 0.005256504751741886
          model: {}
          policy_loss: -0.005664147902280092
          total_loss: 3.3048155307769775
          vf_explained_var: 0.51256066560

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,321,5655.88,1284000,130.732,226.208,-6.09176,951.56


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,321,5655.88,1284000,130.732,226.208,-6.09176,951.56


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,321,5655.88,1284000,130.732,226.208,-6.09176,951.56


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,321,5655.88,1284000,130.732,226.208,-6.09176,951.56


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1288000
  counters:
    num_agent_steps_sampled: 1288000
    num_agent_steps_trained: 1288000
    num_env_steps_sampled: 1288000
    num_env_steps_trained: 1288000
  custom_metrics: {}
  date: 2022-07-23_22-44-18
  done: false
  episode_len_mean: 951.56
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 129.94927368361655
  episode_reward_min: -6.091755071575804
  episodes_this_iter: 4
  episodes_total: 2260
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.996326744556427
          entropy_coeff: 0.0
          kl: 0.01615268364548683
          model: {}
          policy_loss: -0.006787680089473724
          total_loss: 3.3421945571899414
          vf_explained_var: 0.4753352999687

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,322,5676.97,1288000,129.949,226.208,-6.09176,951.56


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,322,5676.97,1288000,129.949,226.208,-6.09176,951.56


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,322,5676.97,1288000,129.949,226.208,-6.09176,951.56


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1292000
  counters:
    num_agent_steps_sampled: 1292000
    num_agent_steps_trained: 1292000
    num_env_steps_sampled: 1292000
    num_env_steps_trained: 1292000
  custom_metrics: {}
  date: 2022-07-23_22-44-37
  done: false
  episode_len_mean: 951.56
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 131.1149145791745
  episode_reward_min: -6.091755071575804
  episodes_this_iter: 4
  episodes_total: 2264
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9537035822868347
          entropy_coeff: 0.0
          kl: 0.001988118514418602
          model: {}
          policy_loss: -0.005232600960880518
          total_loss: 2.7851755619049072
          vf_explained_var: 0.485525667667

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,323,5696.14,1292000,131.115,226.208,-6.09176,951.56


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,323,5696.14,1292000,131.115,226.208,-6.09176,951.56


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,323,5696.14,1292000,131.115,226.208,-6.09176,951.56


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1296000
  counters:
    num_agent_steps_sampled: 1296000
    num_agent_steps_trained: 1296000
    num_env_steps_sampled: 1296000
    num_env_steps_trained: 1296000
  custom_metrics: {}
  date: 2022-07-23_22-44-56
  done: false
  episode_len_mean: 951.56
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 131.32759124961424
  episode_reward_min: -6.091755071575804
  episodes_this_iter: 4
  episodes_total: 2268
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9657389521598816
          entropy_coeff: 0.0
          kl: 0.008894331753253937
          model: {}
          policy_loss: -0.0050540161319077015
          total_loss: 2.8339803218841553
          vf_explained_var: 0.3788156211

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,324,5714.76,1296000,131.328,226.208,-6.09176,951.56


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,324,5714.76,1296000,131.328,226.208,-6.09176,951.56


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,324,5714.76,1296000,131.328,226.208,-6.09176,951.56


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1300000
  counters:
    num_agent_steps_sampled: 1300000
    num_agent_steps_trained: 1300000
    num_env_steps_sampled: 1300000
    num_env_steps_trained: 1300000
  custom_metrics: {}
  date: 2022-07-23_22-45-15
  done: false
  episode_len_mean: 944.08
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 130.8674377763974
  episode_reward_min: -6.091755071575804
  episodes_this_iter: 4
  episodes_total: 2272
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.977988600730896
          entropy_coeff: 0.0
          kl: 0.0020286741200834513
          model: {}
          policy_loss: -0.003763908054679632
          total_loss: 3.4993913173675537
          vf_explained_var: 0.477531135082

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,325,5733.66,1300000,130.867,226.208,-6.09176,944.08


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,325,5733.66,1300000,130.867,226.208,-6.09176,944.08


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,325,5733.66,1300000,130.867,226.208,-6.09176,944.08


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1304000
  counters:
    num_agent_steps_sampled: 1304000
    num_agent_steps_trained: 1304000
    num_env_steps_sampled: 1304000
    num_env_steps_trained: 1304000
  custom_metrics: {}
  date: 2022-07-23_22-45-33
  done: false
  episode_len_mean: 944.08
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 130.89575138017233
  episode_reward_min: -6.091755071575804
  episodes_this_iter: 4
  episodes_total: 2276
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9479215145111084
          entropy_coeff: 0.0
          kl: 0.002077800454571843
          model: {}
          policy_loss: -0.006213036365807056
          total_loss: 3.765321969985962
          vf_explained_var: 0.459160536527

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,326,5751.84,1304000,130.896,226.208,-6.09176,944.08


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,326,5751.84,1304000,130.896,226.208,-6.09176,944.08


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,326,5751.84,1304000,130.896,226.208,-6.09176,944.08


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1308000
  counters:
    num_agent_steps_sampled: 1308000
    num_agent_steps_trained: 1308000
    num_env_steps_sampled: 1308000
    num_env_steps_trained: 1308000
  custom_metrics: {}
  date: 2022-07-23_22-45-52
  done: false
  episode_len_mean: 944.08
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 130.62383720541854
  episode_reward_min: -6.091755071575804
  episodes_this_iter: 4
  episodes_total: 2280
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.009422779083252
          entropy_coeff: 0.0
          kl: 0.024968449026346207
          model: {}
          policy_loss: -0.009525828994810581
          total_loss: 2.927124500274658
          vf_explained_var: 0.4174897372722

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,327,5770.23,1308000,130.624,226.208,-6.09176,944.08


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,327,5770.23,1308000,130.624,226.208,-6.09176,944.08


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,327,5770.23,1308000,130.624,226.208,-6.09176,944.08


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1312000
  counters:
    num_agent_steps_sampled: 1312000
    num_agent_steps_trained: 1312000
    num_env_steps_sampled: 1312000
    num_env_steps_trained: 1312000
  custom_metrics: {}
  date: 2022-07-23_22-46-10
  done: false
  episode_len_mean: 944.08
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 130.59545856791365
  episode_reward_min: -6.091755071575804
  episodes_this_iter: 4
  episodes_total: 2284
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9840579032897949
          entropy_coeff: 0.0
          kl: 0.002161614131182432
          model: {}
          policy_loss: -0.0038450255524367094
          total_loss: 4.387883186340332
          vf_explained_var: 0.48698902130

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,328,5788.78,1312000,130.595,226.208,-6.09176,944.08


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,328,5788.78,1312000,130.595,226.208,-6.09176,944.08


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,328,5788.78,1312000,130.595,226.208,-6.09176,944.08


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1316000
  counters:
    num_agent_steps_sampled: 1316000
    num_agent_steps_trained: 1316000
    num_env_steps_sampled: 1316000
    num_env_steps_trained: 1316000
  custom_metrics: {}
  date: 2022-07-23_22-46-28
  done: false
  episode_len_mean: 944.08
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 130.65289565916135
  episode_reward_min: -6.091755071575804
  episodes_this_iter: 4
  episodes_total: 2288
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9936845898628235
          entropy_coeff: 0.0
          kl: 0.0006401596474461257
          model: {}
          policy_loss: -0.004130131099373102
          total_loss: 3.8846895694732666
          vf_explained_var: 0.4204905629

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,329,5806.99,1316000,130.653,226.208,-6.09176,944.08


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,329,5806.99,1316000,130.653,226.208,-6.09176,944.08


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,329,5806.99,1316000,130.653,226.208,-6.09176,944.08


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1320000
  counters:
    num_agent_steps_sampled: 1320000
    num_agent_steps_trained: 1320000
    num_env_steps_sampled: 1320000
    num_env_steps_trained: 1320000
  custom_metrics: {}
  date: 2022-07-23_22-46-47
  done: false
  episode_len_mean: 943.8
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 131.67936673807972
  episode_reward_min: -6.091755071575804
  episodes_this_iter: 5
  episodes_total: 2293
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9584083557128906
          entropy_coeff: 0.0
          kl: 0.003264227183535695
          model: {}
          policy_loss: 0.009220805950462818
          total_loss: 3.5699265003204346
          vf_explained_var: 0.3971892893314

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,330,5825.54,1320000,131.679,226.208,-6.09176,943.8


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,330,5825.54,1320000,131.679,226.208,-6.09176,943.8


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,330,5825.54,1320000,131.679,226.208,-6.09176,943.8


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1324000
  counters:
    num_agent_steps_sampled: 1324000
    num_agent_steps_trained: 1324000
    num_env_steps_sampled: 1324000
    num_env_steps_trained: 1324000
  custom_metrics: {}
  date: 2022-07-23_22-47-06
  done: false
  episode_len_mean: 943.8
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 131.31978576655712
  episode_reward_min: -6.091755071575804
  episodes_this_iter: 4
  episodes_total: 2297
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9011334180831909
          entropy_coeff: 0.0
          kl: 0.0007492317818105221
          model: {}
          policy_loss: 0.0004361379542388022
          total_loss: 3.281282663345337
          vf_explained_var: 0.479909837245

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,331,5844.49,1324000,131.32,226.208,-6.09176,943.8


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,331,5844.49,1324000,131.32,226.208,-6.09176,943.8


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,331,5844.49,1324000,131.32,226.208,-6.09176,943.8


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1328000
  counters:
    num_agent_steps_sampled: 1328000
    num_agent_steps_trained: 1328000
    num_env_steps_sampled: 1328000
    num_env_steps_trained: 1328000
  custom_metrics: {}
  date: 2022-07-23_22-47-25
  done: false
  episode_len_mean: 928.77
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 128.8212689437926
  episode_reward_min: -38.89360582998917
  episodes_this_iter: 6
  episodes_total: 2303
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9472265243530273
          entropy_coeff: 0.0
          kl: 0.00490726251155138
          model: {}
          policy_loss: -0.002387743443250656
          total_loss: 4.923369407653809
          vf_explained_var: 0.32997947931289

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,332,5863.5,1328000,128.821,226.208,-38.8936,928.77


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,332,5863.5,1328000,128.821,226.208,-38.8936,928.77


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,332,5863.5,1328000,128.821,226.208,-38.8936,928.77


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1332000
  counters:
    num_agent_steps_sampled: 1332000
    num_agent_steps_trained: 1332000
    num_env_steps_sampled: 1332000
    num_env_steps_trained: 1332000
  custom_metrics: {}
  date: 2022-07-23_22-47-44
  done: false
  episode_len_mean: 928.77
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 129.1975112104085
  episode_reward_min: -38.89360582998917
  episodes_this_iter: 4
  episodes_total: 2307
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9862689971923828
          entropy_coeff: 0.0
          kl: 0.002840878441929817
          model: {}
          policy_loss: -0.008144018240272999
          total_loss: 3.42524790763855
          vf_explained_var: 0.38930499553680

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,333,5882.21,1332000,129.198,226.208,-38.8936,928.77


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,333,5882.21,1332000,129.198,226.208,-38.8936,928.77


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,333,5882.21,1332000,129.198,226.208,-38.8936,928.77


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1336000
  counters:
    num_agent_steps_sampled: 1336000
    num_agent_steps_trained: 1336000
    num_env_steps_sampled: 1336000
    num_env_steps_trained: 1336000
  custom_metrics: {}
  date: 2022-07-23_22-48-03
  done: false
  episode_len_mean: 921.15
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 128.9012002029949
  episode_reward_min: -38.89360582998917
  episodes_this_iter: 5
  episodes_total: 2312
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9692621827125549
          entropy_coeff: 0.0
          kl: 0.008376975543797016
          model: {}
          policy_loss: -0.0021715599577873945
          total_loss: 4.3510918617248535
          vf_explained_var: 0.43633955717

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,334,5901.33,1336000,128.901,226.208,-38.8936,921.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,334,5901.33,1336000,128.901,226.208,-38.8936,921.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,334,5901.33,1336000,128.901,226.208,-38.8936,921.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1340000
  counters:
    num_agent_steps_sampled: 1340000
    num_agent_steps_trained: 1340000
    num_env_steps_sampled: 1340000
    num_env_steps_trained: 1340000
  custom_metrics: {}
  date: 2022-07-23_22-48-22
  done: false
  episode_len_mean: 929.74
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 129.8493091928653
  episode_reward_min: -38.89360582998917
  episodes_this_iter: 4
  episodes_total: 2316
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0329973697662354
          entropy_coeff: 0.0
          kl: 0.011080954223871231
          model: {}
          policy_loss: -0.010115304961800575
          total_loss: 3.1156413555145264
          vf_explained_var: 0.410946756601

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,335,5919.95,1340000,129.849,226.208,-38.8936,929.74


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,335,5919.95,1340000,129.849,226.208,-38.8936,929.74


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,335,5919.95,1340000,129.849,226.208,-38.8936,929.74


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1344000
  counters:
    num_agent_steps_sampled: 1344000
    num_agent_steps_trained: 1344000
    num_env_steps_sampled: 1344000
    num_env_steps_trained: 1344000
  custom_metrics: {}
  date: 2022-07-23_22-48-40
  done: false
  episode_len_mean: 914.6
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 127.39320458614883
  episode_reward_min: -38.89360582998917
  episodes_this_iter: 5
  episodes_total: 2321
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9682736396789551
          entropy_coeff: 0.0
          kl: 0.005552539601922035
          model: {}
          policy_loss: -0.0031899153254926205
          total_loss: 3.713179111480713
          vf_explained_var: 0.403838992118

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,336,5938.15,1344000,127.393,226.208,-38.8936,914.6


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,336,5938.15,1344000,127.393,226.208,-38.8936,914.6


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,336,5938.15,1344000,127.393,226.208,-38.8936,914.6


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1348000
  counters:
    num_agent_steps_sampled: 1348000
    num_agent_steps_trained: 1348000
    num_env_steps_sampled: 1348000
    num_env_steps_trained: 1348000
  custom_metrics: {}
  date: 2022-07-23_22-48-58
  done: false
  episode_len_mean: 899.64
  episode_media: {}
  episode_reward_max: 226.2084712933431
  episode_reward_mean: 125.3081945383548
  episode_reward_min: -38.89360582998917
  episodes_this_iter: 6
  episodes_total: 2327
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9350902438163757
          entropy_coeff: 0.0
          kl: 0.0035630029160529375
          model: {}
          policy_loss: -0.0015668084379285574
          total_loss: 4.382806301116943
          vf_explained_var: 0.37526139616

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,337,5956,1348000,125.308,226.208,-38.8936,899.64


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,337,5956,1348000,125.308,226.208,-38.8936,899.64


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,337,5956,1348000,125.308,226.208,-38.8936,899.64


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1352000
  counters:
    num_agent_steps_sampled: 1352000
    num_agent_steps_trained: 1352000
    num_env_steps_sampled: 1352000
    num_env_steps_trained: 1352000
  custom_metrics: {}
  date: 2022-07-23_22-49-16
  done: false
  episode_len_mean: 895.71
  episode_media: {}
  episode_reward_max: 204.09820313956698
  episode_reward_mean: 123.10499230946375
  episode_reward_min: -38.89360582998917
  episodes_this_iter: 5
  episodes_total: 2332
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9912014603614807
          entropy_coeff: 0.0
          kl: 0.002100470010191202
          model: {}
          policy_loss: -0.0042731547728180885
          total_loss: 3.5196423530578613
          vf_explained_var: 0.375687241

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,338,5974.31,1352000,123.105,204.098,-38.8936,895.71


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,338,5974.31,1352000,123.105,204.098,-38.8936,895.71


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,338,5974.31,1352000,123.105,204.098,-38.8936,895.71


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1356000
  counters:
    num_agent_steps_sampled: 1356000
    num_agent_steps_trained: 1356000
    num_env_steps_sampled: 1356000
    num_env_steps_trained: 1356000
  custom_metrics: {}
  date: 2022-07-23_22-49-34
  done: false
  episode_len_mean: 888.38
  episode_media: {}
  episode_reward_max: 204.09820313956698
  episode_reward_mean: 122.78080842057047
  episode_reward_min: -38.89360582998917
  episodes_this_iter: 6
  episodes_total: 2338
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9324033260345459
          entropy_coeff: 0.0
          kl: 0.0020351610146462917
          model: {}
          policy_loss: -0.0027663891669362783
          total_loss: 3.3723785877227783
          vf_explained_var: 0.24631562

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,339,5992.28,1356000,122.781,204.098,-38.8936,888.38


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,339,5992.28,1356000,122.781,204.098,-38.8936,888.38


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,339,5992.28,1356000,122.781,204.098,-38.8936,888.38


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1360000
  counters:
    num_agent_steps_sampled: 1360000
    num_agent_steps_trained: 1360000
    num_env_steps_sampled: 1360000
    num_env_steps_trained: 1360000
  custom_metrics: {}
  date: 2022-07-23_22-49-54
  done: false
  episode_len_mean: 888.38
  episode_media: {}
  episode_reward_max: 204.09820313956698
  episode_reward_mean: 123.81148361319089
  episode_reward_min: -38.89360582998917
  episodes_this_iter: 4
  episodes_total: 2342
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9713783264160156
          entropy_coeff: 0.0
          kl: 0.0024262031074613333
          model: {}
          policy_loss: -0.0071931323036551476
          total_loss: 2.9273521900177
          vf_explained_var: 0.29725071787

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,340,6011.84,1360000,123.811,204.098,-38.8936,888.38


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,340,6011.84,1360000,123.811,204.098,-38.8936,888.38


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,340,6011.84,1360000,123.811,204.098,-38.8936,888.38


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1364000
  counters:
    num_agent_steps_sampled: 1364000
    num_agent_steps_trained: 1364000
    num_env_steps_sampled: 1364000
    num_env_steps_trained: 1364000
  custom_metrics: {}
  date: 2022-07-23_22-50-13
  done: false
  episode_len_mean: 888.38
  episode_media: {}
  episode_reward_max: 204.09820313956698
  episode_reward_mean: 124.31766139905882
  episode_reward_min: -38.89360582998917
  episodes_this_iter: 4
  episodes_total: 2346
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9487783908843994
          entropy_coeff: 0.0
          kl: 0.0074158827774226665
          model: {}
          policy_loss: -0.00720990402624011
          total_loss: 2.793256998062134
          vf_explained_var: 0.29564976692

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,341,6030.66,1364000,124.318,204.098,-38.8936,888.38


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,341,6030.66,1364000,124.318,204.098,-38.8936,888.38


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,341,6030.66,1364000,124.318,204.098,-38.8936,888.38


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1368000
  counters:
    num_agent_steps_sampled: 1368000
    num_agent_steps_trained: 1368000
    num_env_steps_sampled: 1368000
    num_env_steps_trained: 1368000
  custom_metrics: {}
  date: 2022-07-23_22-50-32
  done: false
  episode_len_mean: 895.62
  episode_media: {}
  episode_reward_max: 204.09820313956698
  episode_reward_mean: 124.72964355602473
  episode_reward_min: -38.89360582998917
  episodes_this_iter: 4
  episodes_total: 2350
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9709498882293701
          entropy_coeff: 0.0
          kl: 0.001445863046683371
          model: {}
          policy_loss: -0.011669989675283432
          total_loss: 3.0877785682678223
          vf_explained_var: 0.3089120388

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,342,6049.98,1368000,124.73,204.098,-38.8936,895.62


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,342,6049.98,1368000,124.73,204.098,-38.8936,895.62


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,342,6049.98,1368000,124.73,204.098,-38.8936,895.62


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1372000
  counters:
    num_agent_steps_sampled: 1372000
    num_agent_steps_trained: 1372000
    num_env_steps_sampled: 1372000
    num_env_steps_trained: 1372000
  custom_metrics: {}
  date: 2022-07-23_22-50-51
  done: false
  episode_len_mean: 903.01
  episode_media: {}
  episode_reward_max: 204.09820313956698
  episode_reward_mean: 124.90967795229786
  episode_reward_min: -38.89360582998917
  episodes_this_iter: 4
  episodes_total: 2354
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8755160570144653
          entropy_coeff: 0.0
          kl: 0.011654756963253021
          model: {}
          policy_loss: -0.008620867505669594
          total_loss: 2.8074982166290283
          vf_explained_var: 0.3198232352

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,343,6068.89,1372000,124.91,204.098,-38.8936,903.01


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,343,6068.89,1372000,124.91,204.098,-38.8936,903.01


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,343,6068.89,1372000,124.91,204.098,-38.8936,903.01


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1376000
  counters:
    num_agent_steps_sampled: 1376000
    num_agent_steps_trained: 1376000
    num_env_steps_sampled: 1376000
    num_env_steps_trained: 1376000
  custom_metrics: {}
  date: 2022-07-23_22-51-11
  done: false
  episode_len_mean: 892.63
  episode_media: {}
  episode_reward_max: 225.84456745560192
  episode_reward_mean: 127.0099541289987
  episode_reward_min: -38.89360582998917
  episodes_this_iter: 5
  episodes_total: 2359
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9020411372184753
          entropy_coeff: 0.0
          kl: 0.006666648667305708
          model: {}
          policy_loss: -0.0031525292433798313
          total_loss: 4.60446310043335
          vf_explained_var: 0.349287986755

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,344,6088.6,1376000,127.01,225.845,-38.8936,892.63


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,344,6088.6,1376000,127.01,225.845,-38.8936,892.63


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,344,6088.6,1376000,127.01,225.845,-38.8936,892.63


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1380000
  counters:
    num_agent_steps_sampled: 1380000
    num_agent_steps_trained: 1380000
    num_env_steps_sampled: 1380000
    num_env_steps_trained: 1380000
  custom_metrics: {}
  date: 2022-07-23_22-51-31
  done: false
  episode_len_mean: 885.58
  episode_media: {}
  episode_reward_max: 225.84456745560192
  episode_reward_mean: 125.61895176213694
  episode_reward_min: -38.89360582998917
  episodes_this_iter: 4
  episodes_total: 2363
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9684855341911316
          entropy_coeff: 0.0
          kl: 0.0009664887911640108
          model: {}
          policy_loss: -0.0020384753588587046
          total_loss: 3.439706802368164
          vf_explained_var: 0.363253831

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,345,6108.17,1380000,125.619,225.845,-38.8936,885.58


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,345,6108.17,1380000,125.619,225.845,-38.8936,885.58


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,345,6108.17,1380000,125.619,225.845,-38.8936,885.58


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1384000
  counters:
    num_agent_steps_sampled: 1384000
    num_agent_steps_trained: 1384000
    num_env_steps_sampled: 1384000
    num_env_steps_trained: 1384000
  custom_metrics: {}
  date: 2022-07-23_22-51-49
  done: false
  episode_len_mean: 871.31
  episode_media: {}
  episode_reward_max: 225.84456745560192
  episode_reward_mean: 123.06441494144323
  episode_reward_min: -38.89360582998917
  episodes_this_iter: 5
  episodes_total: 2368
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9782674312591553
          entropy_coeff: 0.0
          kl: 0.0025911135599017143
          model: {}
          policy_loss: -0.0016394505510106683
          total_loss: 5.244227886199951
          vf_explained_var: 0.410620480

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,346,6126.47,1384000,123.064,225.845,-38.8936,871.31


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,346,6126.47,1384000,123.064,225.845,-38.8936,871.31


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,346,6126.47,1384000,123.064,225.845,-38.8936,871.31


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1388000
  counters:
    num_agent_steps_sampled: 1388000
    num_agent_steps_trained: 1388000
    num_env_steps_sampled: 1388000
    num_env_steps_trained: 1388000
  custom_metrics: {}
  date: 2022-07-23_22-52-07
  done: false
  episode_len_mean: 867.01
  episode_media: {}
  episode_reward_max: 239.89657190885512
  episode_reward_mean: 125.9355603679645
  episode_reward_min: -38.89360582998917
  episodes_this_iter: 6
  episodes_total: 2374
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8563930988311768
          entropy_coeff: 0.0
          kl: 0.004680955316871405
          model: {}
          policy_loss: -0.006175442133098841
          total_loss: 4.4925665855407715
          vf_explained_var: 0.40432596206

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,347,6144.69,1388000,125.936,239.897,-38.8936,867.01


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,347,6144.69,1388000,125.936,239.897,-38.8936,867.01


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,347,6144.69,1388000,125.936,239.897,-38.8936,867.01


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1392000
  counters:
    num_agent_steps_sampled: 1392000
    num_agent_steps_trained: 1392000
    num_env_steps_sampled: 1392000
    num_env_steps_trained: 1392000
  custom_metrics: {}
  date: 2022-07-23_22-52-26
  done: false
  episode_len_mean: 859.81
  episode_media: {}
  episode_reward_max: 239.89657190885512
  episode_reward_mean: 124.50098828738157
  episode_reward_min: -38.89360582998917
  episodes_this_iter: 5
  episodes_total: 2379
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9022487998008728
          entropy_coeff: 0.0
          kl: 0.006455558817833662
          model: {}
          policy_loss: -0.005930474027991295
          total_loss: 4.037818908691406
          vf_explained_var: 0.31487834453

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,348,6163.08,1392000,124.501,239.897,-38.8936,859.81


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,348,6163.08,1392000,124.501,239.897,-38.8936,859.81


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,348,6163.08,1392000,124.501,239.897,-38.8936,859.81


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1396000
  counters:
    num_agent_steps_sampled: 1396000
    num_agent_steps_trained: 1396000
    num_env_steps_sampled: 1396000
    num_env_steps_trained: 1396000
  custom_metrics: {}
  date: 2022-07-23_22-52-44
  done: false
  episode_len_mean: 845.9
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 126.60110140112374
  episode_reward_min: -38.89360582998917
  episodes_this_iter: 5
  episodes_total: 2384
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8649225831031799
          entropy_coeff: 0.0
          kl: 0.003264789469540119
          model: {}
          policy_loss: -0.004320909734815359
          total_loss: 4.020447254180908
          vf_explained_var: 0.392093688249

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,349,6180.8,1396000,126.601,261.434,-38.8936,845.9


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,349,6180.8,1396000,126.601,261.434,-38.8936,845.9


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,349,6180.8,1396000,126.601,261.434,-38.8936,845.9


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1400000
  counters:
    num_agent_steps_sampled: 1400000
    num_agent_steps_trained: 1400000
    num_env_steps_sampled: 1400000
    num_env_steps_trained: 1400000
  custom_metrics: {}
  date: 2022-07-23_22-53-03
  done: false
  episode_len_mean: 841.94
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 127.77557342021713
  episode_reward_min: -38.89360582998917
  episodes_this_iter: 4
  episodes_total: 2388
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8651810884475708
          entropy_coeff: 0.0
          kl: 0.011271439492702484
          model: {}
          policy_loss: -0.004540806636214256
          total_loss: 2.8753702640533447
          vf_explained_var: 0.3473851382

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,350,6199.85,1400000,127.776,261.434,-38.8936,841.94


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,350,6199.85,1400000,127.776,261.434,-38.8936,841.94


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,350,6199.85,1400000,127.776,261.434,-38.8936,841.94


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1404000
  counters:
    num_agent_steps_sampled: 1404000
    num_agent_steps_trained: 1404000
    num_env_steps_sampled: 1404000
    num_env_steps_trained: 1404000
  custom_metrics: {}
  date: 2022-07-23_22-53-22
  done: false
  episode_len_mean: 849.48
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 128.06272297081875
  episode_reward_min: -38.89360582998917
  episodes_this_iter: 4
  episodes_total: 2392
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9304860830307007
          entropy_coeff: 0.0
          kl: 0.008567022159695625
          model: {}
          policy_loss: -0.00859635416418314
          total_loss: 2.8844945430755615
          vf_explained_var: 0.42326945066

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,351,6219.25,1404000,128.063,261.434,-38.8936,849.48


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,351,6219.25,1404000,128.063,261.434,-38.8936,849.48


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,351,6219.25,1404000,128.063,261.434,-38.8936,849.48


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1408000
  counters:
    num_agent_steps_sampled: 1408000
    num_agent_steps_trained: 1408000
    num_env_steps_sampled: 1408000
    num_env_steps_trained: 1408000
  custom_metrics: {}
  date: 2022-07-23_22-53-40
  done: false
  episode_len_mean: 841.91
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 126.5946485713372
  episode_reward_min: -38.89360582998917
  episodes_this_iter: 4
  episodes_total: 2396
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8707584738731384
          entropy_coeff: 0.0
          kl: 0.004584148526191711
          model: {}
          policy_loss: -0.0031741156708449125
          total_loss: 3.3241543769836426
          vf_explained_var: 0.3417146503

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,352,6237.1,1408000,126.595,261.434,-38.8936,841.91


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,352,6237.1,1408000,126.595,261.434,-38.8936,841.91


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,352,6237.1,1408000,126.595,261.434,-38.8936,841.91


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1412000
  counters:
    num_agent_steps_sampled: 1412000
    num_agent_steps_trained: 1412000
    num_env_steps_sampled: 1412000
    num_env_steps_trained: 1412000
  custom_metrics: {}
  date: 2022-07-23_22-53-58
  done: false
  episode_len_mean: 841.75
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 127.45365715089285
  episode_reward_min: -21.61196886116157
  episodes_this_iter: 5
  episodes_total: 2401
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9106345772743225
          entropy_coeff: 0.0
          kl: 0.004348010290414095
          model: {}
          policy_loss: -0.006676936522126198
          total_loss: 3.780073881149292
          vf_explained_var: 0.22447185218

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,353,6255.09,1412000,127.454,261.434,-21.612,841.75


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,353,6255.09,1412000,127.454,261.434,-21.612,841.75


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,353,6255.09,1412000,127.454,261.434,-21.612,841.75


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1416000
  counters:
    num_agent_steps_sampled: 1416000
    num_agent_steps_trained: 1416000
    num_env_steps_sampled: 1416000
    num_env_steps_trained: 1416000
  custom_metrics: {}
  date: 2022-07-23_22-54-17
  done: false
  episode_len_mean: 834.77
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 128.06656445482147
  episode_reward_min: -21.61196886116157
  episodes_this_iter: 6
  episodes_total: 2407
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8949204683303833
          entropy_coeff: 0.0
          kl: 0.002386404201388359
          model: {}
          policy_loss: -0.003984977025538683
          total_loss: 3.9289426803588867
          vf_explained_var: 0.2430999130

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,354,6273.48,1416000,128.067,261.434,-21.612,834.77


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,354,6273.48,1416000,128.067,261.434,-21.612,834.77


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,354,6273.48,1416000,128.067,261.434,-21.612,834.77


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1420000
  counters:
    num_agent_steps_sampled: 1420000
    num_agent_steps_trained: 1420000
    num_env_steps_sampled: 1420000
    num_env_steps_trained: 1420000
  custom_metrics: {}
  date: 2022-07-23_22-54-34
  done: false
  episode_len_mean: 827.99
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 126.7307986722919
  episode_reward_min: -21.61196886116157
  episodes_this_iter: 6
  episodes_total: 2413
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9132259488105774
          entropy_coeff: 0.0
          kl: 0.0076532685197889805
          model: {}
          policy_loss: -0.006079334765672684
          total_loss: 3.7938334941864014
          vf_explained_var: 0.3308619856

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,355,6290.59,1420000,126.731,261.434,-21.612,827.99


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,355,6290.59,1420000,126.731,261.434,-21.612,827.99


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,355,6290.59,1420000,126.731,261.434,-21.612,827.99


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1424000
  counters:
    num_agent_steps_sampled: 1424000
    num_agent_steps_trained: 1424000
    num_env_steps_sampled: 1424000
    num_env_steps_trained: 1424000
  custom_metrics: {}
  date: 2022-07-23_22-54-53
  done: false
  episode_len_mean: 821.22
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 126.91212605278902
  episode_reward_min: -21.61196886116157
  episodes_this_iter: 4
  episodes_total: 2417
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.865337610244751
          entropy_coeff: 0.0
          kl: 0.012020284309983253
          model: {}
          policy_loss: -0.002911259653046727
          total_loss: 3.639265775680542
          vf_explained_var: 0.284164637327

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,356,6309.62,1424000,126.912,261.434,-21.612,821.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,356,6309.62,1424000,126.912,261.434,-21.612,821.22


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,356,6309.62,1424000,126.912,261.434,-21.612,821.22


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1428000
  counters:
    num_agent_steps_sampled: 1428000
    num_agent_steps_trained: 1428000
    num_env_steps_sampled: 1428000
    num_env_steps_trained: 1428000
  custom_metrics: {}
  date: 2022-07-23_22-55-11
  done: false
  episode_len_mean: 829.96
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 129.64851405476742
  episode_reward_min: -10.392714461923717
  episodes_this_iter: 4
  episodes_total: 2421
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.921715259552002
          entropy_coeff: 0.0
          kl: 0.010784612037241459
          model: {}
          policy_loss: -0.0029536678921431303
          total_loss: 3.771491527557373
          vf_explained_var: 0.2030279040

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,357,6327.86,1428000,129.649,261.434,-10.3927,829.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,357,6327.86,1428000,129.649,261.434,-10.3927,829.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,357,6327.86,1428000,129.649,261.434,-10.3927,829.96


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1432000
  counters:
    num_agent_steps_sampled: 1432000
    num_agent_steps_trained: 1432000
    num_env_steps_sampled: 1432000
    num_env_steps_trained: 1432000
  custom_metrics: {}
  date: 2022-07-23_22-55-29
  done: false
  episode_len_mean: 844.34
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 133.27855087717458
  episode_reward_min: -10.392714461923717
  episodes_this_iter: 5
  episodes_total: 2426
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8962976336479187
          entropy_coeff: 0.0
          kl: 0.007723847404122353
          model: {}
          policy_loss: -0.0024318997748196125
          total_loss: 3.797023057937622
          vf_explained_var: 0.283530265

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,358,6346.14,1432000,133.279,261.434,-10.3927,844.34


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,358,6346.14,1432000,133.279,261.434,-10.3927,844.34


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,358,6346.14,1432000,133.279,261.434,-10.3927,844.34


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1436000
  counters:
    num_agent_steps_sampled: 1436000
    num_agent_steps_trained: 1436000
    num_env_steps_sampled: 1436000
    num_env_steps_trained: 1436000
  custom_metrics: {}
  date: 2022-07-23_22-55-48
  done: false
  episode_len_mean: 844.9
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 132.39328912855484
  episode_reward_min: -10.392714461923717
  episodes_this_iter: 4
  episodes_total: 2430
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9633994102478027
          entropy_coeff: 0.0
          kl: 0.017069898545742035
          model: {}
          policy_loss: -0.004217182286083698
          total_loss: 4.580893039703369
          vf_explained_var: 0.53307056427

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,359,6364.74,1436000,132.393,261.434,-10.3927,844.9


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,359,6364.74,1436000,132.393,261.434,-10.3927,844.9


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,359,6364.74,1436000,132.393,261.434,-10.3927,844.9


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1440000
  counters:
    num_agent_steps_sampled: 1440000
    num_agent_steps_trained: 1440000
    num_env_steps_sampled: 1440000
    num_env_steps_trained: 1440000
  custom_metrics: {}
  date: 2022-07-23_22-56-06
  done: false
  episode_len_mean: 828.94
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 133.66793543398745
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 9
  episodes_total: 2439
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7734056115150452
          entropy_coeff: 0.0
          kl: 0.002808776218444109
          model: {}
          policy_loss: 0.0002640467428136617
          total_loss: 6.486286163330078
          vf_explained_var: 0.2388889491

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,360,6382.98,1440000,133.668,261.434,-53.5227,828.94


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,360,6382.98,1440000,133.668,261.434,-53.5227,828.94


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,360,6382.98,1440000,133.668,261.434,-53.5227,828.94


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1444000
  counters:
    num_agent_steps_sampled: 1444000
    num_agent_steps_trained: 1444000
    num_env_steps_sampled: 1444000
    num_env_steps_trained: 1444000
  custom_metrics: {}
  date: 2022-07-23_22-56-25
  done: false
  episode_len_mean: 815.62
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 130.43126872372162
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 5
  episodes_total: 2444
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8797683715820312
          entropy_coeff: 0.0
          kl: 0.0010384165216237307
          model: {}
          policy_loss: -0.0028013032861053944
          total_loss: 4.981503963470459
          vf_explained_var: 0.35334464

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,361,6401.58,1444000,130.431,261.434,-53.5227,815.62


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,361,6401.58,1444000,130.431,261.434,-53.5227,815.62


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,361,6401.58,1444000,130.431,261.434,-53.5227,815.62


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1448000
  counters:
    num_agent_steps_sampled: 1448000
    num_agent_steps_trained: 1448000
    num_env_steps_sampled: 1448000
    num_env_steps_trained: 1448000
  custom_metrics: {}
  date: 2022-07-23_22-56-45
  done: false
  episode_len_mean: 815.62
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 130.2043180582789
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 4
  episodes_total: 2448
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0127023458480835
          entropy_coeff: 0.0
          kl: 0.003165470901876688
          model: {}
          policy_loss: -0.002077922224998474
          total_loss: 3.538898229598999
          vf_explained_var: 0.42378553748

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,362,6421.69,1448000,130.204,261.434,-53.5227,815.62


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,362,6421.69,1448000,130.204,261.434,-53.5227,815.62


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,362,6421.69,1448000,130.204,261.434,-53.5227,815.62


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1452000
  counters:
    num_agent_steps_sampled: 1452000
    num_agent_steps_trained: 1452000
    num_env_steps_sampled: 1452000
    num_env_steps_trained: 1452000
  custom_metrics: {}
  date: 2022-07-23_22-57-05
  done: false
  episode_len_mean: 815.62
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 129.77633838921395
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 4
  episodes_total: 2452
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0308401584625244
          entropy_coeff: 0.0
          kl: 0.006310691591352224
          model: {}
          policy_loss: -0.007856846787035465
          total_loss: 2.8526132106781006
          vf_explained_var: 0.301420360

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,363,6441.29,1452000,129.776,261.434,-53.5227,815.62


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,363,6441.29,1452000,129.776,261.434,-53.5227,815.62


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,363,6441.29,1452000,129.776,261.434,-53.5227,815.62


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1456000
  counters:
    num_agent_steps_sampled: 1456000
    num_agent_steps_trained: 1456000
    num_env_steps_sampled: 1456000
    num_env_steps_trained: 1456000
  custom_metrics: {}
  date: 2022-07-23_22-57-24
  done: false
  episode_len_mean: 815.62
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 129.97925704516874
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 4
  episodes_total: 2456
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0083914995193481
          entropy_coeff: 0.0
          kl: 0.0007974540931172669
          model: {}
          policy_loss: -0.00030912584043107927
          total_loss: 3.1111724376678467
          vf_explained_var: 0.337159

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,364,6460.42,1456000,129.979,261.434,-53.5227,815.62


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,364,6460.42,1456000,129.979,261.434,-53.5227,815.62


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,364,6460.42,1456000,129.979,261.434,-53.5227,815.62


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1460000
  counters:
    num_agent_steps_sampled: 1460000
    num_agent_steps_trained: 1460000
    num_env_steps_sampled: 1460000
    num_env_steps_trained: 1460000
  custom_metrics: {}
  date: 2022-07-23_22-57-42
  done: false
  episode_len_mean: 822.32
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 129.71851102129165
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 4
  episodes_total: 2460
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0005868673324585
          entropy_coeff: 0.0
          kl: 0.013311152346432209
          model: {}
          policy_loss: -0.006605237722396851
          total_loss: 3.742506265640259
          vf_explained_var: 0.3140694797

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,365,6478.23,1460000,129.719,261.434,-53.5227,822.32


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,365,6478.23,1460000,129.719,261.434,-53.5227,822.32


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,365,6478.23,1460000,129.719,261.434,-53.5227,822.32


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1464000
  counters:
    num_agent_steps_sampled: 1464000
    num_agent_steps_trained: 1464000
    num_env_steps_sampled: 1464000
    num_env_steps_trained: 1464000
  custom_metrics: {}
  date: 2022-07-23_22-58-00
  done: false
  episode_len_mean: 829.37
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 130.82461472201751
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 4
  episodes_total: 2464
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0216221809387207
          entropy_coeff: 0.0
          kl: 0.004664363339543343
          model: {}
          policy_loss: -0.002790712984278798
          total_loss: 3.4972262382507324
          vf_explained_var: 0.271849036

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,366,6495.83,1464000,130.825,261.434,-53.5227,829.37


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,366,6495.83,1464000,130.825,261.434,-53.5227,829.37


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,366,6495.83,1464000,130.825,261.434,-53.5227,829.37


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1468000
  counters:
    num_agent_steps_sampled: 1468000
    num_agent_steps_trained: 1468000
    num_env_steps_sampled: 1468000
    num_env_steps_trained: 1468000
  custom_metrics: {}
  date: 2022-07-23_22-58-18
  done: false
  episode_len_mean: 843.64
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 132.3392307516057
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 4
  episodes_total: 2468
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9984272718429565
          entropy_coeff: 0.0
          kl: 0.0036772964522242546
          model: {}
          policy_loss: -0.005093531683087349
          total_loss: 3.160788059234619
          vf_explained_var: 0.2938686609

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,367,6513.95,1468000,132.339,261.434,-53.5227,843.64


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,367,6513.95,1468000,132.339,261.434,-53.5227,843.64


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,367,6513.95,1468000,132.339,261.434,-53.5227,843.64


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1472000
  counters:
    num_agent_steps_sampled: 1472000
    num_agent_steps_trained: 1472000
    num_env_steps_sampled: 1472000
    num_env_steps_trained: 1472000
  custom_metrics: {}
  date: 2022-07-23_22-58-36
  done: false
  episode_len_mean: 842.7
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 130.95195613205334
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 5
  episodes_total: 2473
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0134352445602417
          entropy_coeff: 0.0
          kl: 0.001652386737987399
          model: {}
          policy_loss: -0.007949016988277435
          total_loss: 4.315751075744629
          vf_explained_var: 0.42990806698

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,368,6531.94,1472000,130.952,261.434,-53.5227,842.7


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,368,6531.94,1472000,130.952,261.434,-53.5227,842.7


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,368,6531.94,1472000,130.952,261.434,-53.5227,842.7


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1476000
  counters:
    num_agent_steps_sampled: 1476000
    num_agent_steps_trained: 1476000
    num_env_steps_sampled: 1476000
    num_env_steps_trained: 1476000
  custom_metrics: {}
  date: 2022-07-23_22-58-54
  done: false
  episode_len_mean: 856.0
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 131.32786489835377
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 4
  episodes_total: 2477
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.998399555683136
          entropy_coeff: 0.0
          kl: 0.004520114976912737
          model: {}
          policy_loss: -0.0005662202602252364
          total_loss: 2.9673380851745605
          vf_explained_var: 0.3695369064

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,369,6550.08,1476000,131.328,261.434,-53.5227,856


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,369,6550.08,1476000,131.328,261.434,-53.5227,856


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,369,6550.08,1476000,131.328,261.434,-53.5227,856


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1480000
  counters:
    num_agent_steps_sampled: 1480000
    num_agent_steps_trained: 1480000
    num_env_steps_sampled: 1480000
    num_env_steps_trained: 1480000
  custom_metrics: {}
  date: 2022-07-23_22-59-11
  done: false
  episode_len_mean: 856.0
  episode_media: {}
  episode_reward_max: 261.43351968537337
  episode_reward_mean: 131.75244004268717
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 4
  episodes_total: 2481
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0190942287445068
          entropy_coeff: 0.0
          kl: 0.014360317960381508
          model: {}
          policy_loss: -0.005433949176222086
          total_loss: 2.9020586013793945
          vf_explained_var: 0.3285948634

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,370,6567.44,1480000,131.752,261.434,-53.5227,856


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,370,6567.44,1480000,131.752,261.434,-53.5227,856


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,370,6567.44,1480000,131.752,261.434,-53.5227,856


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1484000
  counters:
    num_agent_steps_sampled: 1484000
    num_agent_steps_trained: 1484000
    num_env_steps_sampled: 1484000
    num_env_steps_trained: 1484000
  custom_metrics: {}
  date: 2022-07-23_22-59-29
  done: false
  episode_len_mean: 856.91
  episode_media: {}
  episode_reward_max: 258.0231317861502
  episode_reward_mean: 129.46646094093543
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 5
  episodes_total: 2486
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9978590607643127
          entropy_coeff: 0.0
          kl: 0.004298931919038296
          model: {}
          policy_loss: -0.002187663223594427
          total_loss: 5.351438522338867
          vf_explained_var: 0.22433833777

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,371,6585.01,1484000,129.466,258.023,-53.5227,856.91


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,371,6585.01,1484000,129.466,258.023,-53.5227,856.91


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,371,6585.01,1484000,129.466,258.023,-53.5227,856.91


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1488000
  counters:
    num_agent_steps_sampled: 1488000
    num_agent_steps_trained: 1488000
    num_env_steps_sampled: 1488000
    num_env_steps_trained: 1488000
  custom_metrics: {}
  date: 2022-07-23_22-59-48
  done: false
  episode_len_mean: 860.87
  episode_media: {}
  episode_reward_max: 249.30009988006447
  episode_reward_mean: 128.09243988564825
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 4
  episodes_total: 2490
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0024150609970093
          entropy_coeff: 0.0
          kl: 0.020331664010882378
          model: {}
          policy_loss: -0.009311835281550884
          total_loss: 3.1299352645874023
          vf_explained_var: 0.476467847

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,372,6603.84,1488000,128.092,249.3,-53.5227,860.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,372,6603.84,1488000,128.092,249.3,-53.5227,860.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,372,6603.84,1488000,128.092,249.3,-53.5227,860.87


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1492000
  counters:
    num_agent_steps_sampled: 1492000
    num_agent_steps_trained: 1492000
    num_env_steps_sampled: 1492000
    num_env_steps_trained: 1492000
  custom_metrics: {}
  date: 2022-07-23_23-00-06
  done: false
  episode_len_mean: 868.44
  episode_media: {}
  episode_reward_max: 249.30009988006447
  episode_reward_mean: 128.58764673131375
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 4
  episodes_total: 2494
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0090056657791138
          entropy_coeff: 0.0
          kl: 0.00197865953668952
          model: {}
          policy_loss: -0.007193172816187143
          total_loss: 2.681393623352051
          vf_explained_var: 0.36910238862

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,373,6622.1,1492000,128.588,249.3,-53.5227,868.44


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,373,6622.1,1492000,128.588,249.3,-53.5227,868.44


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,373,6622.1,1492000,128.588,249.3,-53.5227,868.44


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1496000
  counters:
    num_agent_steps_sampled: 1496000
    num_agent_steps_trained: 1496000
    num_env_steps_sampled: 1496000
    num_env_steps_trained: 1496000
  custom_metrics: {}
  date: 2022-07-23_23-00-25
  done: false
  episode_len_mean: 868.44
  episode_media: {}
  episode_reward_max: 249.30009988006447
  episode_reward_mean: 128.26793235794202
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 4
  episodes_total: 2498
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0148037672042847
          entropy_coeff: 0.0
          kl: 0.018408261239528656
          model: {}
          policy_loss: -0.007293186616152525
          total_loss: 3.0080766677856445
          vf_explained_var: 0.334461838

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,374,6640.28,1496000,128.268,249.3,-53.5227,868.44


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,374,6640.28,1496000,128.268,249.3,-53.5227,868.44


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,374,6640.28,1496000,128.268,249.3,-53.5227,868.44


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1500000
  counters:
    num_agent_steps_sampled: 1500000
    num_agent_steps_trained: 1500000
    num_env_steps_sampled: 1500000
    num_env_steps_trained: 1500000
  custom_metrics: {}
  date: 2022-07-23_23-00-42
  done: false
  episode_len_mean: 876.08
  episode_media: {}
  episode_reward_max: 249.35461121846959
  episode_reward_mean: 128.84092527976213
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 5
  episodes_total: 2503
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9464083909988403
          entropy_coeff: 0.0
          kl: 0.004448905121535063
          model: {}
          policy_loss: -0.0056426809169352055
          total_loss: 3.189668655395508
          vf_explained_var: 0.410259038

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,375,6657.57,1500000,128.841,249.355,-53.5227,876.08


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,375,6657.57,1500000,128.841,249.355,-53.5227,876.08


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,375,6657.57,1500000,128.841,249.355,-53.5227,876.08


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1504000
  counters:
    num_agent_steps_sampled: 1504000
    num_agent_steps_trained: 1504000
    num_env_steps_sampled: 1504000
    num_env_steps_trained: 1504000
  custom_metrics: {}
  date: 2022-07-23_23-01-00
  done: false
  episode_len_mean: 883.83
  episode_media: {}
  episode_reward_max: 249.35461121846959
  episode_reward_mean: 129.82973407358222
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 4
  episodes_total: 2507
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9791322946548462
          entropy_coeff: 0.0
          kl: 0.010515585541725159
          model: {}
          policy_loss: -0.004998861812055111
          total_loss: 2.5820517539978027
          vf_explained_var: 0.269107282

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,376,6675.78,1504000,129.83,249.355,-53.5227,883.83


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,376,6675.78,1504000,129.83,249.355,-53.5227,883.83


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,376,6675.78,1504000,129.83,249.355,-53.5227,883.83


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1508000
  counters:
    num_agent_steps_sampled: 1508000
    num_agent_steps_trained: 1508000
    num_env_steps_sampled: 1508000
    num_env_steps_trained: 1508000
  custom_metrics: {}
  date: 2022-07-23_23-01-17
  done: false
  episode_len_mean: 890.92
  episode_media: {}
  episode_reward_max: 249.35461121846959
  episode_reward_mean: 131.26870008759047
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 4
  episodes_total: 2511
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9972357153892517
          entropy_coeff: 0.0
          kl: 0.005906482692807913
          model: {}
          policy_loss: -0.007301907055079937
          total_loss: 3.171251058578491
          vf_explained_var: 0.2829698026

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,377,6692.95,1508000,131.269,249.355,-53.5227,890.92


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,377,6692.95,1508000,131.269,249.355,-53.5227,890.92


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,377,6692.95,1508000,131.269,249.355,-53.5227,890.92


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1512000
  counters:
    num_agent_steps_sampled: 1512000
    num_agent_steps_trained: 1512000
    num_env_steps_sampled: 1512000
    num_env_steps_trained: 1512000
  custom_metrics: {}
  date: 2022-07-23_23-01-35
  done: false
  episode_len_mean: 898.23
  episode_media: {}
  episode_reward_max: 249.35461121846959
  episode_reward_mean: 132.62462134921864
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 4
  episodes_total: 2515
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9628874659538269
          entropy_coeff: 0.0
          kl: 0.006089069880545139
          model: {}
          policy_loss: 0.0017116171075031161
          total_loss: 3.3042054176330566
          vf_explained_var: 0.313006639

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,378,6710.47,1512000,132.625,249.355,-53.5227,898.23


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,378,6710.47,1512000,132.625,249.355,-53.5227,898.23


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,378,6710.47,1512000,132.625,249.355,-53.5227,898.23


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1516000
  counters:
    num_agent_steps_sampled: 1516000
    num_agent_steps_trained: 1516000
    num_env_steps_sampled: 1516000
    num_env_steps_trained: 1516000
  custom_metrics: {}
  date: 2022-07-23_23-01-53
  done: false
  episode_len_mean: 911.4
  episode_media: {}
  episode_reward_max: 249.35461121846959
  episode_reward_mean: 131.8496266331354
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 4
  episodes_total: 2519
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9151957631111145
          entropy_coeff: 0.0
          kl: 0.009213204495608807
          model: {}
          policy_loss: -0.00608624704182148
          total_loss: 3.682178258895874
          vf_explained_var: 0.3224666416645

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,379,6728.14,1516000,131.85,249.355,-53.5227,911.4


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,379,6728.14,1516000,131.85,249.355,-53.5227,911.4


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,379,6728.14,1516000,131.85,249.355,-53.5227,911.4


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1520000
  counters:
    num_agent_steps_sampled: 1520000
    num_agent_steps_trained: 1520000
    num_env_steps_sampled: 1520000
    num_env_steps_trained: 1520000
  custom_metrics: {}
  date: 2022-07-23_23-02-10
  done: false
  episode_len_mean: 911.4
  episode_media: {}
  episode_reward_max: 249.35461121846959
  episode_reward_mean: 132.38534308527176
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 4
  episodes_total: 2523
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9745886325836182
          entropy_coeff: 0.0
          kl: 0.0012777028605341911
          model: {}
          policy_loss: -0.004965389613062143
          total_loss: 2.4563355445861816
          vf_explained_var: 0.275543659

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,380,6745.56,1520000,132.385,249.355,-53.5227,911.4


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,380,6745.56,1520000,132.385,249.355,-53.5227,911.4


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,380,6745.56,1520000,132.385,249.355,-53.5227,911.4


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1524000
  counters:
    num_agent_steps_sampled: 1524000
    num_agent_steps_trained: 1524000
    num_env_steps_sampled: 1524000
    num_env_steps_trained: 1524000
  custom_metrics: {}
  date: 2022-07-23_23-02-28
  done: false
  episode_len_mean: 910.46
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 132.87326977349596
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 5
  episodes_total: 2528
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9245159029960632
          entropy_coeff: 0.0
          kl: 0.000211934995604679
          model: {}
          policy_loss: -0.001451037242077291
          total_loss: 3.2024073600769043
          vf_explained_var: 0.230708166

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,381,6763.02,1524000,132.873,249.992,-53.5227,910.46


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,381,6763.02,1524000,132.873,249.992,-53.5227,910.46


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,381,6763.02,1524000,132.873,249.992,-53.5227,910.46


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1528000
  counters:
    num_agent_steps_sampled: 1528000
    num_agent_steps_trained: 1528000
    num_env_steps_sampled: 1528000
    num_env_steps_trained: 1528000
  custom_metrics: {}
  date: 2022-07-23_23-02-46
  done: false
  episode_len_mean: 910.46
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 133.36829450783338
  episode_reward_min: -53.522718499385675
  episodes_this_iter: 4
  episodes_total: 2532
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0272477865219116
          entropy_coeff: 0.0
          kl: 0.0040292683988809586
          model: {}
          policy_loss: -0.005197660531848669
          total_loss: 2.6995809078216553
          vf_explained_var: 0.24336363

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,382,6781.05,1528000,133.368,249.992,-53.5227,910.46


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,382,6781.05,1528000,133.368,249.992,-53.5227,910.46


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,382,6781.05,1528000,133.368,249.992,-53.5227,910.46


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1532000
  counters:
    num_agent_steps_sampled: 1532000
    num_agent_steps_trained: 1532000
    num_env_steps_sampled: 1532000
    num_env_steps_trained: 1532000
  custom_metrics: {}
  date: 2022-07-23_23-03-04
  done: false
  episode_len_mean: 930.29
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 135.84845404964597
  episode_reward_min: -25.22624469282637
  episodes_this_iter: 4
  episodes_total: 2536
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.031040072441101
          entropy_coeff: 0.0
          kl: 0.0035872466396540403
          model: {}
          policy_loss: -0.004063108470290899
          total_loss: 2.6719162464141846
          vf_explained_var: 0.1246598958

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,383,6799.09,1532000,135.848,249.992,-25.2262,930.29


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,383,6799.09,1532000,135.848,249.992,-25.2262,930.29


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,383,6799.09,1532000,135.848,249.992,-25.2262,930.29


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1536000
  counters:
    num_agent_steps_sampled: 1536000
    num_agent_steps_trained: 1536000
    num_env_steps_sampled: 1536000
    num_env_steps_trained: 1536000
  custom_metrics: {}
  date: 2022-07-23_23-03-22
  done: false
  episode_len_mean: 955.24
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 135.5250521738827
  episode_reward_min: -4.153285497633931
  episodes_this_iter: 4
  episodes_total: 2540
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0233862400054932
          entropy_coeff: 0.0
          kl: 0.0007173223420977592
          model: {}
          policy_loss: -0.0036502364091575146
          total_loss: 2.546203374862671
          vf_explained_var: 0.1419195681

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,384,6817.28,1536000,135.525,249.992,-4.15329,955.24


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,384,6817.28,1536000,135.525,249.992,-4.15329,955.24


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,384,6817.28,1536000,135.525,249.992,-4.15329,955.24


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1540000
  counters:
    num_agent_steps_sampled: 1540000
    num_agent_steps_trained: 1540000
    num_env_steps_sampled: 1540000
    num_env_steps_trained: 1540000
  custom_metrics: {}
  date: 2022-07-23_23-03-41
  done: false
  episode_len_mean: 961.78
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 137.87658977973126
  episode_reward_min: -4.153285497633931
  episodes_this_iter: 4
  episodes_total: 2544
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0352343320846558
          entropy_coeff: 0.0
          kl: 0.0011521534761413932
          model: {}
          policy_loss: -0.006181660108268261
          total_loss: 2.7588934898376465
          vf_explained_var: 0.242100492

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,385,6835.85,1540000,137.877,249.992,-4.15329,961.78


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,385,6835.85,1540000,137.877,249.992,-4.15329,961.78


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,385,6835.85,1540000,137.877,249.992,-4.15329,961.78


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1544000
  counters:
    num_agent_steps_sampled: 1544000
    num_agent_steps_trained: 1544000
    num_env_steps_sampled: 1544000
    num_env_steps_trained: 1544000
  custom_metrics: {}
  date: 2022-07-23_23-03-59
  done: false
  episode_len_mean: 961.78
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 139.07965927072527
  episode_reward_min: -4.153285497633931
  episodes_this_iter: 4
  episodes_total: 2548
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9953643679618835
          entropy_coeff: 0.0
          kl: 0.004296800587326288
          model: {}
          policy_loss: -0.005728201009333134
          total_loss: 3.1403121948242188
          vf_explained_var: 0.2872647941

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,386,6854.2,1544000,139.08,249.992,-4.15329,961.78


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,386,6854.2,1544000,139.08,249.992,-4.15329,961.78


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,386,6854.2,1544000,139.08,249.992,-4.15329,961.78


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1548000
  counters:
    num_agent_steps_sampled: 1548000
    num_agent_steps_trained: 1548000
    num_env_steps_sampled: 1548000
    num_env_steps_trained: 1548000
  custom_metrics: {}
  date: 2022-07-23_23-04-18
  done: false
  episode_len_mean: 955.31
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 141.29019719052084
  episode_reward_min: -4.153285497633931
  episodes_this_iter: 4
  episodes_total: 2552
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0322118997573853
          entropy_coeff: 0.0
          kl: 0.006604188587516546
          model: {}
          policy_loss: -0.0040652090683579445
          total_loss: 3.278851270675659
          vf_explained_var: 0.4382933378

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,387,6872.9,1548000,141.29,249.992,-4.15329,955.31


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,387,6872.9,1548000,141.29,249.992,-4.15329,955.31


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,387,6872.9,1548000,141.29,249.992,-4.15329,955.31


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1552000
  counters:
    num_agent_steps_sampled: 1552000
    num_agent_steps_trained: 1552000
    num_env_steps_sampled: 1552000
    num_env_steps_trained: 1552000
  custom_metrics: {}
  date: 2022-07-23_23-04-37
  done: false
  episode_len_mean: 955.31
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 142.0478055555354
  episode_reward_min: -4.153285497633931
  episodes_this_iter: 4
  episodes_total: 2556
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0615705251693726
          entropy_coeff: 0.0
          kl: 0.007532611954957247
          model: {}
          policy_loss: -0.0045924801379442215
          total_loss: 2.2801527976989746
          vf_explained_var: 0.4244184494

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,388,6891.95,1552000,142.048,249.992,-4.15329,955.31


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,388,6891.95,1552000,142.048,249.992,-4.15329,955.31


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,388,6891.95,1552000,142.048,249.992,-4.15329,955.31


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1556000
  counters:
    num_agent_steps_sampled: 1556000
    num_agent_steps_trained: 1556000
    num_env_steps_sampled: 1556000
    num_env_steps_trained: 1556000
  custom_metrics: {}
  date: 2022-07-23_23-04-56
  done: false
  episode_len_mean: 958.99
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 141.3606611301064
  episode_reward_min: -4.153285497633931
  episodes_this_iter: 4
  episodes_total: 2560
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9702754020690918
          entropy_coeff: 0.0
          kl: 0.0027447144966572523
          model: {}
          policy_loss: -0.004110435955226421
          total_loss: 2.7639012336730957
          vf_explained_var: 0.4635294675

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,389,6910.79,1556000,141.361,249.992,-4.15329,958.99


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,389,6910.79,1556000,141.361,249.992,-4.15329,958.99


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,389,6910.79,1556000,141.361,249.992,-4.15329,958.99


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1560000
  counters:
    num_agent_steps_sampled: 1560000
    num_agent_steps_trained: 1560000
    num_env_steps_sampled: 1560000
    num_env_steps_trained: 1560000
  custom_metrics: {}
  date: 2022-07-23_23-05-15
  done: false
  episode_len_mean: 951.26
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 140.85539036116305
  episode_reward_min: -4.153285497633931
  episodes_this_iter: 5
  episodes_total: 2565
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9901610016822815
          entropy_coeff: 0.0
          kl: 0.003133732359856367
          model: {}
          policy_loss: -0.00302401022054255
          total_loss: 2.60282564163208
          vf_explained_var: 0.4817534089088

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,390,6929.62,1560000,140.855,249.992,-4.15329,951.26


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,390,6929.62,1560000,140.855,249.992,-4.15329,951.26


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,390,6929.62,1560000,140.855,249.992,-4.15329,951.26


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1564000
  counters:
    num_agent_steps_sampled: 1564000
    num_agent_steps_trained: 1564000
    num_env_steps_sampled: 1564000
    num_env_steps_trained: 1564000
  custom_metrics: {}
  date: 2022-07-23_23-05-33
  done: false
  episode_len_mean: 951.26
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 142.08972681679154
  episode_reward_min: -4.153285497633931
  episodes_this_iter: 4
  episodes_total: 2569
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0524775981903076
          entropy_coeff: 0.0
          kl: 0.01679551787674427
          model: {}
          policy_loss: -0.008131138049066067
          total_loss: 2.8633999824523926
          vf_explained_var: 0.44874188303

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,391,6947.69,1564000,142.09,249.992,-4.15329,951.26


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,391,6947.69,1564000,142.09,249.992,-4.15329,951.26


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,391,6947.69,1564000,142.09,249.992,-4.15329,951.26


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1568000
  counters:
    num_agent_steps_sampled: 1568000
    num_agent_steps_trained: 1568000
    num_env_steps_sampled: 1568000
    num_env_steps_trained: 1568000
  custom_metrics: {}
  date: 2022-07-23_23-05-52
  done: false
  episode_len_mean: 946.68
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 141.95765271257608
  episode_reward_min: -4.153285497633931
  episodes_this_iter: 5
  episodes_total: 2574
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9487119317054749
          entropy_coeff: 0.0
          kl: 0.002341042272746563
          model: {}
          policy_loss: -0.001998450607061386
          total_loss: 3.1855523586273193
          vf_explained_var: 0.2835648655

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,392,6966.68,1568000,141.958,249.992,-4.15329,946.68


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,392,6966.68,1568000,141.958,249.992,-4.15329,946.68


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,392,6966.68,1568000,141.958,249.992,-4.15329,946.68


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1572000
  counters:
    num_agent_steps_sampled: 1572000
    num_agent_steps_trained: 1572000
    num_env_steps_sampled: 1572000
    num_env_steps_trained: 1572000
  custom_metrics: {}
  date: 2022-07-23_23-06-11
  done: false
  episode_len_mean: 946.68
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 142.46337050849274
  episode_reward_min: -4.153285497633931
  episodes_this_iter: 4
  episodes_total: 2578
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8799576163291931
          entropy_coeff: 0.0
          kl: 0.007784219924360514
          model: {}
          policy_loss: -0.004512927494943142
          total_loss: 3.4238529205322266
          vf_explained_var: 0.5106375217

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,393,6985.39,1572000,142.463,249.992,-4.15329,946.68


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,393,6985.39,1572000,142.463,249.992,-4.15329,946.68


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,393,6985.39,1572000,142.463,249.992,-4.15329,946.68


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1576000
  counters:
    num_agent_steps_sampled: 1576000
    num_agent_steps_trained: 1576000
    num_env_steps_sampled: 1576000
    num_env_steps_trained: 1576000
  custom_metrics: {}
  date: 2022-07-23_23-06-30
  done: false
  episode_len_mean: 939.2
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 140.18559861787628
  episode_reward_min: -19.50457558623573
  episodes_this_iter: 4
  episodes_total: 2582
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0420358180999756
          entropy_coeff: 0.0
          kl: 0.002598217921331525
          model: {}
          policy_loss: -0.0022383476607501507
          total_loss: 2.846332311630249
          vf_explained_var: 0.30311757326

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,394,7004.75,1576000,140.186,249.992,-19.5046,939.2


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,394,7004.75,1576000,140.186,249.992,-19.5046,939.2


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,394,7004.75,1576000,140.186,249.992,-19.5046,939.2


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,394,7004.75,1576000,140.186,249.992,-19.5046,939.2


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1580000
  counters:
    num_agent_steps_sampled: 1580000
    num_agent_steps_trained: 1580000
    num_env_steps_sampled: 1580000
    num_env_steps_trained: 1580000
  custom_metrics: {}
  date: 2022-07-23_23-06-51
  done: false
  episode_len_mean: 944.93
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 140.18181570899364
  episode_reward_min: -19.50457558623573
  episodes_this_iter: 5
  episodes_total: 2587
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0009686946868896
          entropy_coeff: 0.0
          kl: 0.006512829568237066
          model: {}
          policy_loss: -0.0015725988196209073
          total_loss: 3.8649377822875977
          vf_explained_var: 0.290965378

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,395,7025.56,1580000,140.182,249.992,-19.5046,944.93


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,395,7025.56,1580000,140.182,249.992,-19.5046,944.93


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,395,7025.56,1580000,140.182,249.992,-19.5046,944.93


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1584000
  counters:
    num_agent_steps_sampled: 1584000
    num_agent_steps_trained: 1584000
    num_env_steps_sampled: 1584000
    num_env_steps_trained: 1584000
  custom_metrics: {}
  date: 2022-07-23_23-07-11
  done: false
  episode_len_mean: 937.61
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 139.08840393822177
  episode_reward_min: -19.50457558623573
  episodes_this_iter: 5
  episodes_total: 2592
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.036407709121704
          entropy_coeff: 0.0
          kl: 0.009163406677544117
          model: {}
          policy_loss: -0.004274848848581314
          total_loss: 3.209446430206299
          vf_explained_var: 0.521938323974

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,396,7045.15,1584000,139.088,249.992,-19.5046,937.61


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,396,7045.15,1584000,139.088,249.992,-19.5046,937.61


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,396,7045.15,1584000,139.088,249.992,-19.5046,937.61


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1588000
  counters:
    num_agent_steps_sampled: 1588000
    num_agent_steps_trained: 1588000
    num_env_steps_sampled: 1588000
    num_env_steps_trained: 1588000
  custom_metrics: {}
  date: 2022-07-23_23-07-30
  done: false
  episode_len_mean: 937.61
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 139.47909055218
  episode_reward_min: -19.50457558623573
  episodes_this_iter: 4
  episodes_total: 2596
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0437439680099487
          entropy_coeff: 0.0
          kl: 0.004697206895798445
          model: {}
          policy_loss: -0.005769453477114439
          total_loss: 2.7817745208740234
          vf_explained_var: 0.4815301895141

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,397,7064.23,1588000,139.479,249.992,-19.5046,937.61


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,397,7064.23,1588000,139.479,249.992,-19.5046,937.61


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,397,7064.23,1588000,139.479,249.992,-19.5046,937.61


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1592000
  counters:
    num_agent_steps_sampled: 1592000
    num_agent_steps_trained: 1592000
    num_env_steps_sampled: 1592000
    num_env_steps_trained: 1592000
  custom_metrics: {}
  date: 2022-07-23_23-07-50
  done: false
  episode_len_mean: 923.99
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 138.61254705986778
  episode_reward_min: -19.50457558623573
  episodes_this_iter: 6
  episodes_total: 2602
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8918614387512207
          entropy_coeff: 0.0
          kl: 0.0031081782653927803
          model: {}
          policy_loss: -0.002316008787602186
          total_loss: 3.9714319705963135
          vf_explained_var: 0.472954213

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,398,7083.81,1592000,138.613,249.992,-19.5046,923.99


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,398,7083.81,1592000,138.613,249.992,-19.5046,923.99


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,398,7083.81,1592000,138.613,249.992,-19.5046,923.99


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1596000
  counters:
    num_agent_steps_sampled: 1596000
    num_agent_steps_trained: 1596000
    num_env_steps_sampled: 1596000
    num_env_steps_trained: 1596000
  custom_metrics: {}
  date: 2022-07-23_23-08-08
  done: false
  episode_len_mean: 916.87
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 137.15173200243237
  episode_reward_min: -30.73161188045661
  episodes_this_iter: 6
  episodes_total: 2608
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9740394949913025
          entropy_coeff: 0.0
          kl: 0.0015773903578519821
          model: {}
          policy_loss: -0.006042927969247103
          total_loss: 3.661362886428833
          vf_explained_var: 0.2685893774

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,399,7102.3,1596000,137.152,249.992,-30.7316,916.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,399,7102.3,1596000,137.152,249.992,-30.7316,916.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,399,7102.3,1596000,137.152,249.992,-30.7316,916.87


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1600000
  counters:
    num_agent_steps_sampled: 1600000
    num_agent_steps_trained: 1600000
    num_env_steps_sampled: 1600000
    num_env_steps_trained: 1600000
  custom_metrics: {}
  date: 2022-07-23_23-08-27
  done: false
  episode_len_mean: 902.68
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 136.78820229786427
  episode_reward_min: -30.73161188045661
  episodes_this_iter: 4
  episodes_total: 2612
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.008626937866211
          entropy_coeff: 0.0
          kl: 0.002720350632444024
          model: {}
          policy_loss: -0.0013233715435490012
          total_loss: 3.6977009773254395
          vf_explained_var: 0.2999020218

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,400,7121.02,1600000,136.788,249.992,-30.7316,902.68


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,400,7121.02,1600000,136.788,249.992,-30.7316,902.68


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,400,7121.02,1600000,136.788,249.992,-30.7316,902.68


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1604000
  counters:
    num_agent_steps_sampled: 1604000
    num_agent_steps_trained: 1604000
    num_env_steps_sampled: 1604000
    num_env_steps_trained: 1604000
  custom_metrics: {}
  date: 2022-07-23_23-08-46
  done: false
  episode_len_mean: 895.24
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 135.1316223425867
  episode_reward_min: -55.029733284653304
  episodes_this_iter: 5
  episodes_total: 2617
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0120537281036377
          entropy_coeff: 0.0
          kl: 0.002007654868066311
          model: {}
          policy_loss: -0.0017319994512945414
          total_loss: 3.379424571990967
          vf_explained_var: 0.2919428944

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,401,7139.97,1604000,135.132,249.992,-55.0297,895.24


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,401,7139.97,1604000,135.132,249.992,-55.0297,895.24


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,401,7139.97,1604000,135.132,249.992,-55.0297,895.24


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1608000
  counters:
    num_agent_steps_sampled: 1608000
    num_agent_steps_trained: 1608000
    num_env_steps_sampled: 1608000
    num_env_steps_trained: 1608000
  custom_metrics: {}
  date: 2022-07-23_23-09-06
  done: false
  episode_len_mean: 880.82
  episode_media: {}
  episode_reward_max: 249.99157292304199
  episode_reward_mean: 134.4861406409484
  episode_reward_min: -55.029733284653304
  episodes_this_iter: 6
  episodes_total: 2623
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8359123468399048
          entropy_coeff: 0.0
          kl: 0.0038321407046169043
          model: {}
          policy_loss: -0.0016848676605150104
          total_loss: 4.5151567459106445
          vf_explained_var: 0.34668898

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,402,7159.71,1608000,134.486,249.992,-55.0297,880.82


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,402,7159.71,1608000,134.486,249.992,-55.0297,880.82


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,402,7159.71,1608000,134.486,249.992,-55.0297,880.82


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1612000
  counters:
    num_agent_steps_sampled: 1612000
    num_agent_steps_trained: 1612000
    num_env_steps_sampled: 1612000
    num_env_steps_trained: 1612000
  custom_metrics: {}
  date: 2022-07-23_23-09-25
  done: false
  episode_len_mean: 888.96
  episode_media: {}
  episode_reward_max: 234.72668487642215
  episode_reward_mean: 134.14551978980205
  episode_reward_min: -55.029733284653304
  episodes_this_iter: 4
  episodes_total: 2627
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0071194171905518
          entropy_coeff: 0.0
          kl: 0.006531245540827513
          model: {}
          policy_loss: -0.004598574712872505
          total_loss: 3.4561896324157715
          vf_explained_var: 0.417512089

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,403,7178.59,1612000,134.146,234.727,-55.0297,888.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,403,7178.59,1612000,134.146,234.727,-55.0297,888.96


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,403,7178.59,1612000,134.146,234.727,-55.0297,888.96


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1616000
  counters:
    num_agent_steps_sampled: 1616000
    num_agent_steps_trained: 1616000
    num_env_steps_sampled: 1616000
    num_env_steps_trained: 1616000
  custom_metrics: {}
  date: 2022-07-23_23-09-44
  done: false
  episode_len_mean: 881.49
  episode_media: {}
  episode_reward_max: 234.72668487642215
  episode_reward_mean: 134.41645564139273
  episode_reward_min: -55.029733284653304
  episodes_this_iter: 5
  episodes_total: 2632
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9883821606636047
          entropy_coeff: 0.0
          kl: 0.0008194570546038449
          model: {}
          policy_loss: -0.007853575050830841
          total_loss: 2.647611141204834
          vf_explained_var: 0.375441104

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,404,7197.4,1616000,134.416,234.727,-55.0297,881.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,404,7197.4,1616000,134.416,234.727,-55.0297,881.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,404,7197.4,1616000,134.416,234.727,-55.0297,881.49


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1620000
  counters:
    num_agent_steps_sampled: 1620000
    num_agent_steps_trained: 1620000
    num_env_steps_sampled: 1620000
    num_env_steps_trained: 1620000
  custom_metrics: {}
  date: 2022-07-23_23-10-03
  done: false
  episode_len_mean: 881.49
  episode_media: {}
  episode_reward_max: 234.72668487642215
  episode_reward_mean: 134.05014666803027
  episode_reward_min: -55.029733284653304
  episodes_this_iter: 4
  episodes_total: 2636
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8888417482376099
          entropy_coeff: 0.0
          kl: 0.003047999693080783
          model: {}
          policy_loss: -0.011486002244055271
          total_loss: 2.445289373397827
          vf_explained_var: 0.3307913541

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,405,7216.59,1620000,134.05,234.727,-55.0297,881.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,405,7216.59,1620000,134.05,234.727,-55.0297,881.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,405,7216.59,1620000,134.05,234.727,-55.0297,881.49


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1624000
  counters:
    num_agent_steps_sampled: 1624000
    num_agent_steps_trained: 1624000
    num_env_steps_sampled: 1624000
    num_env_steps_trained: 1624000
  custom_metrics: {}
  date: 2022-07-23_23-10-22
  done: false
  episode_len_mean: 881.49
  episode_media: {}
  episode_reward_max: 234.72668487642215
  episode_reward_mean: 133.55612076082255
  episode_reward_min: -55.029733284653304
  episodes_this_iter: 4
  episodes_total: 2640
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 1.0189117193222046
          entropy_coeff: 0.0
          kl: 0.001642022281885147
          model: {}
          policy_loss: -0.0027765287086367607
          total_loss: 2.7990875244140625
          vf_explained_var: 0.33659577

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,406,7235.84,1624000,133.556,234.727,-55.0297,881.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,406,7235.84,1624000,133.556,234.727,-55.0297,881.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,406,7235.84,1624000,133.556,234.727,-55.0297,881.49


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1628000
  counters:
    num_agent_steps_sampled: 1628000
    num_agent_steps_trained: 1628000
    num_env_steps_sampled: 1628000
    num_env_steps_trained: 1628000
  custom_metrics: {}
  date: 2022-07-23_23-10-42
  done: false
  episode_len_mean: 881.49
  episode_media: {}
  episode_reward_max: 234.72668487642215
  episode_reward_mean: 133.91670757207882
  episode_reward_min: -55.029733284653304
  episodes_this_iter: 4
  episodes_total: 2644
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9836387634277344
          entropy_coeff: 0.0
          kl: 0.008558623492717743
          model: {}
          policy_loss: -0.012712135910987854
          total_loss: 3.9160780906677246
          vf_explained_var: 0.353812098

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,407,7255.33,1628000,133.917,234.727,-55.0297,881.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,407,7255.33,1628000,133.917,234.727,-55.0297,881.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,407,7255.33,1628000,133.917,234.727,-55.0297,881.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,407,7255.33,1628000,133.917,234.727,-55.0297,881.49


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1632000
  counters:
    num_agent_steps_sampled: 1632000
    num_agent_steps_trained: 1632000
    num_env_steps_sampled: 1632000
    num_env_steps_trained: 1632000
  custom_metrics: {}
  date: 2022-07-23_23-11-03
  done: false
  episode_len_mean: 873.81
  episode_media: {}
  episode_reward_max: 234.72668487642215
  episode_reward_mean: 132.52324083479834
  episode_reward_min: -55.029733284653304
  episodes_this_iter: 4
  episodes_total: 2648
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9649996161460876
          entropy_coeff: 0.0
          kl: 0.0009683047537691891
          model: {}
          policy_loss: -0.002850971883162856
          total_loss: 2.678607225418091
          vf_explained_var: 0.352185875

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,408,7276.15,1632000,132.523,234.727,-55.0297,873.81


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,408,7276.15,1632000,132.523,234.727,-55.0297,873.81


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,408,7276.15,1632000,132.523,234.727,-55.0297,873.81


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1636000
  counters:
    num_agent_steps_sampled: 1636000
    num_agent_steps_trained: 1636000
    num_env_steps_sampled: 1636000
    num_env_steps_trained: 1636000
  custom_metrics: {}
  date: 2022-07-23_23-11-22
  done: false
  episode_len_mean: 841.39
  episode_media: {}
  episode_reward_max: 223.82396394486398
  episode_reward_mean: 124.44233573345706
  episode_reward_min: -69.45520056418937
  episodes_this_iter: 8
  episodes_total: 2656
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8437520861625671
          entropy_coeff: 0.0
          kl: 0.002687303116545081
          model: {}
          policy_loss: -0.001451590214855969
          total_loss: 4.051733493804932
          vf_explained_var: 0.24258849024

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,409,7295.39,1636000,124.442,223.824,-69.4552,841.39


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,409,7295.39,1636000,124.442,223.824,-69.4552,841.39


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,409,7295.39,1636000,124.442,223.824,-69.4552,841.39


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1640000
  counters:
    num_agent_steps_sampled: 1640000
    num_agent_steps_trained: 1640000
    num_env_steps_sampled: 1640000
    num_env_steps_trained: 1640000
  custom_metrics: {}
  date: 2022-07-23_23-11-41
  done: false
  episode_len_mean: 833.16
  episode_media: {}
  episode_reward_max: 223.82396394486398
  episode_reward_mean: 123.10673560470401
  episode_reward_min: -69.45520056418937
  episodes_this_iter: 5
  episodes_total: 2661
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9172152876853943
          entropy_coeff: 0.0
          kl: 0.003197015728801489
          model: {}
          policy_loss: 0.0013472975697368383
          total_loss: 3.9983880519866943
          vf_explained_var: 0.4820930063

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,410,7314.68,1640000,123.107,223.824,-69.4552,833.16


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,410,7314.68,1640000,123.107,223.824,-69.4552,833.16


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,410,7314.68,1640000,123.107,223.824,-69.4552,833.16


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1644000
  counters:
    num_agent_steps_sampled: 1644000
    num_agent_steps_trained: 1644000
    num_env_steps_sampled: 1644000
    num_env_steps_trained: 1644000
  custom_metrics: {}
  date: 2022-07-23_23-12-01
  done: false
  episode_len_mean: 833.58
  episode_media: {}
  episode_reward_max: 239.6923255287939
  episode_reward_mean: 125.05843133551042
  episode_reward_min: -69.45520056418937
  episodes_this_iter: 5
  episodes_total: 2666
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9503711462020874
          entropy_coeff: 0.0
          kl: 0.008846966549754143
          model: {}
          policy_loss: -0.004069024231284857
          total_loss: 3.110940933227539
          vf_explained_var: 0.376447051763

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,411,7334.06,1644000,125.058,239.692,-69.4552,833.58


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,411,7334.06,1644000,125.058,239.692,-69.4552,833.58


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,411,7334.06,1644000,125.058,239.692,-69.4552,833.58


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1648000
  counters:
    num_agent_steps_sampled: 1648000
    num_agent_steps_trained: 1648000
    num_env_steps_sampled: 1648000
    num_env_steps_trained: 1648000
  custom_metrics: {}
  date: 2022-07-23_23-12-20
  done: false
  episode_len_mean: 805.94
  episode_media: {}
  episode_reward_max: 239.6923255287939
  episode_reward_mean: 121.31851505650938
  episode_reward_min: -69.45520056418937
  episodes_this_iter: 7
  episodes_total: 2673
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9376656413078308
          entropy_coeff: 0.0
          kl: 0.0033618176821619272
          model: {}
          policy_loss: -0.002314456505700946
          total_loss: 4.757153034210205
          vf_explained_var: 0.40118238329

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,412,7353.04,1648000,121.319,239.692,-69.4552,805.94


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,412,7353.04,1648000,121.319,239.692,-69.4552,805.94


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,412,7353.04,1648000,121.319,239.692,-69.4552,805.94


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1652000
  counters:
    num_agent_steps_sampled: 1652000
    num_agent_steps_trained: 1652000
    num_env_steps_sampled: 1652000
    num_env_steps_trained: 1652000
  custom_metrics: {}
  date: 2022-07-23_23-12-40
  done: false
  episode_len_mean: 813.48
  episode_media: {}
  episode_reward_max: 239.6923255287939
  episode_reward_mean: 121.63745889858006
  episode_reward_min: -69.45520056418937
  episodes_this_iter: 4
  episodes_total: 2677
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9450752139091492
          entropy_coeff: 0.0
          kl: 0.0045359390787780285
          model: {}
          policy_loss: -0.0045789009891450405
          total_loss: 2.2349584102630615
          vf_explained_var: 0.381279408

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,413,7373.07,1652000,121.637,239.692,-69.4552,813.48


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,413,7373.07,1652000,121.637,239.692,-69.4552,813.48


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,413,7373.07,1652000,121.637,239.692,-69.4552,813.48


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1656000
  counters:
    num_agent_steps_sampled: 1656000
    num_agent_steps_trained: 1656000
    num_env_steps_sampled: 1656000
    num_env_steps_trained: 1656000
  custom_metrics: {}
  date: 2022-07-23_23-12-59
  done: false
  episode_len_mean: 812.78
  episode_media: {}
  episode_reward_max: 239.6923255287939
  episode_reward_mean: 123.39787699915834
  episode_reward_min: -69.45520056418937
  episodes_this_iter: 5
  episodes_total: 2682
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9394563436508179
          entropy_coeff: 0.0
          kl: 0.001758253201842308
          model: {}
          policy_loss: -0.003017968498170376
          total_loss: 2.676344633102417
          vf_explained_var: 0.355588912963

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,414,7392.33,1656000,123.398,239.692,-69.4552,812.78


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,414,7392.33,1656000,123.398,239.692,-69.4552,812.78


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,414,7392.33,1656000,123.398,239.692,-69.4552,812.78


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1660000
  counters:
    num_agent_steps_sampled: 1660000
    num_agent_steps_trained: 1660000
    num_env_steps_sampled: 1660000
    num_env_steps_trained: 1660000
  custom_metrics: {}
  date: 2022-07-23_23-13-19
  done: false
  episode_len_mean: 773.51
  episode_media: {}
  episode_reward_max: 239.6923255287939
  episode_reward_mean: 118.87651329208194
  episode_reward_min: -69.45520056418937
  episodes_this_iter: 8
  episodes_total: 2690
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8827882409095764
          entropy_coeff: 0.0
          kl: 0.007821436040103436
          model: {}
          policy_loss: -0.0016506731044501066
          total_loss: 4.889160633087158
          vf_explained_var: 0.26768583059

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,415,7411.79,1660000,118.877,239.692,-69.4552,773.51


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,415,7411.79,1660000,118.877,239.692,-69.4552,773.51


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,415,7411.79,1660000,118.877,239.692,-69.4552,773.51


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1664000
  counters:
    num_agent_steps_sampled: 1664000
    num_agent_steps_trained: 1664000
    num_env_steps_sampled: 1664000
    num_env_steps_trained: 1664000
  custom_metrics: {}
  date: 2022-07-23_23-13-39
  done: false
  episode_len_mean: 780.83
  episode_media: {}
  episode_reward_max: 239.6923255287939
  episode_reward_mean: 120.02535434702642
  episode_reward_min: -69.45520056418937
  episodes_this_iter: 4
  episodes_total: 2694
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.853655219078064
          entropy_coeff: 0.0
          kl: 0.0008294821018353105
          model: {}
          policy_loss: -0.002542683854699135
          total_loss: 3.0404579639434814
          vf_explained_var: 0.37572222948

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,416,7431.55,1664000,120.025,239.692,-69.4552,780.83


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,416,7431.55,1664000,120.025,239.692,-69.4552,780.83


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,416,7431.55,1664000,120.025,239.692,-69.4552,780.83


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1668000
  counters:
    num_agent_steps_sampled: 1668000
    num_agent_steps_trained: 1668000
    num_env_steps_sampled: 1668000
    num_env_steps_trained: 1668000
  custom_metrics: {}
  date: 2022-07-23_23-13-57
  done: false
  episode_len_mean: 763.91
  episode_media: {}
  episode_reward_max: 239.6923255287939
  episode_reward_mean: 117.76589581770855
  episode_reward_min: -69.45520056418937
  episodes_this_iter: 7
  episodes_total: 2701
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.901192307472229
          entropy_coeff: 0.0
          kl: 0.0029544855933636427
          model: {}
          policy_loss: -0.0016498465556651354
          total_loss: 4.163946151733398
          vf_explained_var: 0.34507849812

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,417,7450.3,1668000,117.766,239.692,-69.4552,763.91


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,417,7450.3,1668000,117.766,239.692,-69.4552,763.91


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,417,7450.3,1668000,117.766,239.692,-69.4552,763.91


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1672000
  counters:
    num_agent_steps_sampled: 1672000
    num_agent_steps_trained: 1672000
    num_env_steps_sampled: 1672000
    num_env_steps_trained: 1672000
  custom_metrics: {}
  date: 2022-07-23_23-14-16
  done: false
  episode_len_mean: 757.97
  episode_media: {}
  episode_reward_max: 241.0312117431262
  episode_reward_mean: 119.19176977513808
  episode_reward_min: -69.45520056418937
  episodes_this_iter: 5
  episodes_total: 2706
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9056349396705627
          entropy_coeff: 0.0
          kl: 0.011033591814339161
          model: {}
          policy_loss: -0.004415722098201513
          total_loss: 3.224905490875244
          vf_explained_var: 0.279603391885

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,418,7469.04,1672000,119.192,241.031,-69.4552,757.97


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,418,7469.04,1672000,119.192,241.031,-69.4552,757.97


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,418,7469.04,1672000,119.192,241.031,-69.4552,757.97


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1676000
  counters:
    num_agent_steps_sampled: 1676000
    num_agent_steps_trained: 1676000
    num_env_steps_sampled: 1676000
    num_env_steps_trained: 1676000
  custom_metrics: {}
  date: 2022-07-23_23-14-35
  done: false
  episode_len_mean: 763.34
  episode_media: {}
  episode_reward_max: 241.0312117431262
  episode_reward_mean: 116.23731461956497
  episode_reward_min: -69.45520056418937
  episodes_this_iter: 6
  episodes_total: 2712
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8649399876594543
          entropy_coeff: 0.0
          kl: 0.0015930063091218472
          model: {}
          policy_loss: -0.0010785702615976334
          total_loss: 3.2139933109283447
          vf_explained_var: 0.309266060

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,419,7487.72,1676000,116.237,241.031,-69.4552,763.34


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,419,7487.72,1676000,116.237,241.031,-69.4552,763.34


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,419,7487.72,1676000,116.237,241.031,-69.4552,763.34


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1680000
  counters:
    num_agent_steps_sampled: 1680000
    num_agent_steps_trained: 1680000
    num_env_steps_sampled: 1680000
    num_env_steps_trained: 1680000
  custom_metrics: {}
  date: 2022-07-23_23-14-54
  done: false
  episode_len_mean: 770.78
  episode_media: {}
  episode_reward_max: 241.0312117431262
  episode_reward_mean: 118.08638777180084
  episode_reward_min: -69.45520056418937
  episodes_this_iter: 4
  episodes_total: 2716
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8853280544281006
          entropy_coeff: 0.0
          kl: 0.004256091080605984
          model: {}
          policy_loss: -0.004684118553996086
          total_loss: 3.8456594944000244
          vf_explained_var: 0.45804572105

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,420,7506.71,1680000,118.086,241.031,-69.4552,770.78


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,420,7506.71,1680000,118.086,241.031,-69.4552,770.78


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,420,7506.71,1680000,118.086,241.031,-69.4552,770.78


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1684000
  counters:
    num_agent_steps_sampled: 1684000
    num_agent_steps_trained: 1684000
    num_env_steps_sampled: 1684000
    num_env_steps_trained: 1684000
  custom_metrics: {}
  date: 2022-07-23_23-15-13
  done: false
  episode_len_mean: 771.1
  episode_media: {}
  episode_reward_max: 241.0312117431262
  episode_reward_mean: 119.53474665928579
  episode_reward_min: -69.45520056418937
  episodes_this_iter: 5
  episodes_total: 2721
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8212100267410278
          entropy_coeff: 0.0
          kl: 0.0006067670765332878
          model: {}
          policy_loss: -0.00016205002611968666
          total_loss: 3.450139045715332
          vf_explained_var: 0.4551112353

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,421,7525.86,1684000,119.535,241.031,-69.4552,771.1


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,421,7525.86,1684000,119.535,241.031,-69.4552,771.1


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,421,7525.86,1684000,119.535,241.031,-69.4552,771.1


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1688000
  counters:
    num_agent_steps_sampled: 1688000
    num_agent_steps_trained: 1688000
    num_env_steps_sampled: 1688000
    num_env_steps_trained: 1688000
  custom_metrics: {}
  date: 2022-07-23_23-15-33
  done: false
  episode_len_mean: 771.46
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 120.88436614603299
  episode_reward_min: -69.45520056418937
  episodes_this_iter: 5
  episodes_total: 2726
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8730435967445374
          entropy_coeff: 0.0
          kl: 0.00290055014193058
          model: {}
          policy_loss: -0.004253074061125517
          total_loss: 3.6572606563568115
          vf_explained_var: 0.304045259952

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,422,7545.36,1688000,120.884,247.073,-69.4552,771.46


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,422,7545.36,1688000,120.884,247.073,-69.4552,771.46


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,422,7545.36,1688000,120.884,247.073,-69.4552,771.46


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1692000
  counters:
    num_agent_steps_sampled: 1692000
    num_agent_steps_trained: 1692000
    num_env_steps_sampled: 1692000
    num_env_steps_trained: 1692000
  custom_metrics: {}
  date: 2022-07-23_23-15-52
  done: false
  episode_len_mean: 771.46
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 121.45935332441248
  episode_reward_min: -69.45520056418937
  episodes_this_iter: 4
  episodes_total: 2730
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8807191848754883
          entropy_coeff: 0.0
          kl: 0.0013159082736819983
          model: {}
          policy_loss: 0.0075247157365083694
          total_loss: 2.544297695159912
          vf_explained_var: 0.19097153842

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,423,7564.9,1692000,121.459,247.073,-69.4552,771.46


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,423,7564.9,1692000,121.459,247.073,-69.4552,771.46


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,423,7564.9,1692000,121.459,247.073,-69.4552,771.46


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1696000
  counters:
    num_agent_steps_sampled: 1696000
    num_agent_steps_trained: 1696000
    num_env_steps_sampled: 1696000
    num_env_steps_trained: 1696000
  custom_metrics: {}
  date: 2022-07-23_23-16-11
  done: false
  episode_len_mean: 771.07
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 120.121646114845
  episode_reward_min: -69.45520056418937
  episodes_this_iter: 4
  episodes_total: 2734
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9356227517127991
          entropy_coeff: 0.0
          kl: 0.003742396365851164
          model: {}
          policy_loss: 0.0003380970156285912
          total_loss: 3.3943710327148438
          vf_explained_var: 0.3713470697402

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,424,7583.57,1696000,120.122,247.073,-69.4552,771.07


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,424,7583.57,1696000,120.122,247.073,-69.4552,771.07


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,424,7583.57,1696000,120.122,247.073,-69.4552,771.07


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1700000
  counters:
    num_agent_steps_sampled: 1700000
    num_agent_steps_trained: 1700000
    num_env_steps_sampled: 1700000
    num_env_steps_trained: 1700000
  custom_metrics: {}
  date: 2022-07-23_23-16-30
  done: false
  episode_len_mean: 764.92
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 120.99655954704049
  episode_reward_min: -69.45520056418937
  episodes_this_iter: 5
  episodes_total: 2739
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8372118473052979
          entropy_coeff: 0.0
          kl: 0.003748062066733837
          model: {}
          policy_loss: 0.015383307822048664
          total_loss: 3.276946544647217
          vf_explained_var: 0.3736066818237

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,425,7602.54,1700000,120.997,247.073,-69.4552,764.92


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,425,7602.54,1700000,120.997,247.073,-69.4552,764.92


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,425,7602.54,1700000,120.997,247.073,-69.4552,764.92


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1704000
  counters:
    num_agent_steps_sampled: 1704000
    num_agent_steps_trained: 1704000
    num_env_steps_sampled: 1704000
    num_env_steps_trained: 1704000
  custom_metrics: {}
  date: 2022-07-23_23-16-50
  done: false
  episode_len_mean: 764.92
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 120.28168304827518
  episode_reward_min: -69.45520056418937
  episodes_this_iter: 4
  episodes_total: 2743
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8222114443778992
          entropy_coeff: 0.0
          kl: 0.0020933840423822403
          model: {}
          policy_loss: -0.0032478338107466698
          total_loss: 3.2624406814575195
          vf_explained_var: 0.364956825

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,426,7622.28,1704000,120.282,247.073,-69.4552,764.92


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,426,7622.28,1704000,120.282,247.073,-69.4552,764.92


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,426,7622.28,1704000,120.282,247.073,-69.4552,764.92


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1708000
  counters:
    num_agent_steps_sampled: 1708000
    num_agent_steps_trained: 1708000
    num_env_steps_sampled: 1708000
    num_env_steps_trained: 1708000
  custom_metrics: {}
  date: 2022-07-23_23-17-10
  done: false
  episode_len_mean: 772.6
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 120.7982590573708
  episode_reward_min: -69.45520056418937
  episodes_this_iter: 4
  episodes_total: 2747
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9081649780273438
          entropy_coeff: 0.0
          kl: 0.002023317851126194
          model: {}
          policy_loss: -0.0016335573745891452
          total_loss: 3.1568522453308105
          vf_explained_var: 0.399905771017

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,427,7641.99,1708000,120.798,247.073,-69.4552,772.6


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,427,7641.99,1708000,120.798,247.073,-69.4552,772.6


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,427,7641.99,1708000,120.798,247.073,-69.4552,772.6


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1712000
  counters:
    num_agent_steps_sampled: 1712000
    num_agent_steps_trained: 1712000
    num_env_steps_sampled: 1712000
    num_env_steps_trained: 1712000
  custom_metrics: {}
  date: 2022-07-23_23-17-30
  done: false
  episode_len_mean: 788.15
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 124.44836743123315
  episode_reward_min: -32.2051835038023
  episodes_this_iter: 4
  episodes_total: 2751
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9302623867988586
          entropy_coeff: 0.0
          kl: 0.004079914651811123
          model: {}
          policy_loss: 0.0055472650565207005
          total_loss: 2.887397527694702
          vf_explained_var: 0.3935837745666

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,428,7662.03,1712000,124.448,247.073,-32.2052,788.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,428,7662.03,1712000,124.448,247.073,-32.2052,788.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,428,7662.03,1712000,124.448,247.073,-32.2052,788.15


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,428,7662.03,1712000,124.448,247.073,-32.2052,788.15


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1716000
  counters:
    num_agent_steps_sampled: 1716000
    num_agent_steps_trained: 1716000
    num_env_steps_sampled: 1716000
    num_env_steps_trained: 1716000
  custom_metrics: {}
  date: 2022-07-23_23-17-50
  done: false
  episode_len_mean: 811.49
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 127.24073478612684
  episode_reward_min: -26.711698320221984
  episodes_this_iter: 4
  episodes_total: 2755
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9213188290596008
          entropy_coeff: 0.0
          kl: 0.0020675966516137123
          model: {}
          policy_loss: 0.0025990759022533894
          total_loss: 2.6577341556549072
          vf_explained_var: 0.494188010

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,429,7682.51,1716000,127.241,247.073,-26.7117,811.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,429,7682.51,1716000,127.241,247.073,-26.7117,811.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,429,7682.51,1716000,127.241,247.073,-26.7117,811.49


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1720000
  counters:
    num_agent_steps_sampled: 1720000
    num_agent_steps_trained: 1720000
    num_env_steps_sampled: 1720000
    num_env_steps_trained: 1720000
  custom_metrics: {}
  date: 2022-07-23_23-18-11
  done: false
  episode_len_mean: 811.49
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 125.3655793873032
  episode_reward_min: -26.711698320221984
  episodes_this_iter: 4
  episodes_total: 2759
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9748493432998657
          entropy_coeff: 0.0
          kl: 0.0036084584426134825
          model: {}
          policy_loss: -0.001856166054494679
          total_loss: 2.696810245513916
          vf_explained_var: 0.45011746883

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,430,7702.77,1720000,125.366,247.073,-26.7117,811.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,430,7702.77,1720000,125.366,247.073,-26.7117,811.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,430,7702.77,1720000,125.366,247.073,-26.7117,811.49


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,430,7702.77,1720000,125.366,247.073,-26.7117,811.49


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1724000
  counters:
    num_agent_steps_sampled: 1724000
    num_agent_steps_trained: 1724000
    num_env_steps_sampled: 1724000
    num_env_steps_trained: 1724000
  custom_metrics: {}
  date: 2022-07-23_23-18-31
  done: false
  episode_len_mean: 819.72
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 127.607864137683
  episode_reward_min: -26.711698320221984
  episodes_this_iter: 4
  episodes_total: 2763
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9516605138778687
          entropy_coeff: 0.0
          kl: 0.0110222939401865
          model: {}
          policy_loss: 0.001498448196798563
          total_loss: 3.3719475269317627
          vf_explained_var: 0.326514065265655

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,431,7722.85,1724000,127.608,247.073,-26.7117,819.72


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,431,7722.85,1724000,127.608,247.073,-26.7117,819.72


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,431,7722.85,1724000,127.608,247.073,-26.7117,819.72


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1728000
  counters:
    num_agent_steps_sampled: 1728000
    num_agent_steps_trained: 1728000
    num_env_steps_sampled: 1728000
    num_env_steps_trained: 1728000
  custom_metrics: {}
  date: 2022-07-23_23-18-50
  done: false
  episode_len_mean: 834.97
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 127.9926873319478
  episode_reward_min: -26.711698320221984
  episodes_this_iter: 4
  episodes_total: 2767
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9263639450073242
          entropy_coeff: 0.0
          kl: 0.0012019479181617498
          model: {}
          policy_loss: -0.0024839257821440697
          total_loss: 3.3527565002441406
          vf_explained_var: 0.392944157

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,432,7742.26,1728000,127.993,247.073,-26.7117,834.97


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,432,7742.26,1728000,127.993,247.073,-26.7117,834.97


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,432,7742.26,1728000,127.993,247.073,-26.7117,834.97


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1732000
  counters:
    num_agent_steps_sampled: 1732000
    num_agent_steps_trained: 1732000
    num_env_steps_sampled: 1732000
    num_env_steps_trained: 1732000
  custom_metrics: {}
  date: 2022-07-23_23-19-10
  done: false
  episode_len_mean: 850.16
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 127.68709876291963
  episode_reward_min: -26.711698320221984
  episodes_this_iter: 4
  episodes_total: 2771
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9039151668548584
          entropy_coeff: 0.0
          kl: 0.0009895235998556018
          model: {}
          policy_loss: 0.000783570867497474
          total_loss: 2.959899425506592
          vf_explained_var: 0.36783054471

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,433,7761.51,1732000,127.687,247.073,-26.7117,850.16


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,433,7761.51,1732000,127.687,247.073,-26.7117,850.16


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,433,7761.51,1732000,127.687,247.073,-26.7117,850.16


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1736000
  counters:
    num_agent_steps_sampled: 1736000
    num_agent_steps_trained: 1736000
    num_env_steps_sampled: 1736000
    num_env_steps_trained: 1736000
  custom_metrics: {}
  date: 2022-07-23_23-19-30
  done: false
  episode_len_mean: 858.33
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 128.46354905476477
  episode_reward_min: -26.711698320221984
  episodes_this_iter: 4
  episodes_total: 2775
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9451304078102112
          entropy_coeff: 0.0
          kl: 0.0009758346132002771
          model: {}
          policy_loss: 0.0009318446391262114
          total_loss: 3.3114006519317627
          vf_explained_var: 0.371887505

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,434,7781.5,1736000,128.464,247.073,-26.7117,858.33


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,434,7781.5,1736000,128.464,247.073,-26.7117,858.33


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,434,7781.5,1736000,128.464,247.073,-26.7117,858.33


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1740000
  counters:
    num_agent_steps_sampled: 1740000
    num_agent_steps_trained: 1740000
    num_env_steps_sampled: 1740000
    num_env_steps_trained: 1740000
  custom_metrics: {}
  date: 2022-07-23_23-19-50
  done: false
  episode_len_mean: 858.33
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 128.21731061317175
  episode_reward_min: -26.711698320221984
  episodes_this_iter: 4
  episodes_total: 2779
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9133991003036499
          entropy_coeff: 0.0
          kl: 0.0025872287806123495
          model: {}
          policy_loss: -0.007661215029656887
          total_loss: 2.7731399536132812
          vf_explained_var: 0.330265760

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,435,7801.79,1740000,128.217,247.073,-26.7117,858.33


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,435,7801.79,1740000,128.217,247.073,-26.7117,858.33


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,435,7801.79,1740000,128.217,247.073,-26.7117,858.33


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,435,7801.79,1740000,128.217,247.073,-26.7117,858.33


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1744000
  counters:
    num_agent_steps_sampled: 1744000
    num_agent_steps_trained: 1744000
    num_env_steps_sampled: 1744000
    num_env_steps_trained: 1744000
  custom_metrics: {}
  date: 2022-07-23_23-20-10
  done: false
  episode_len_mean: 866.51
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 129.41134818014532
  episode_reward_min: -26.711698320221984
  episodes_this_iter: 4
  episodes_total: 2783
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.9145966172218323
          entropy_coeff: 0.0
          kl: 0.0008269555983133614
          model: {}
          policy_loss: 0.004008630756288767
          total_loss: 4.625404357910156
          vf_explained_var: 0.38315379619

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,436,7821.99,1744000,129.411,247.073,-26.7117,866.51


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,436,7821.99,1744000,129.411,247.073,-26.7117,866.51


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,436,7821.99,1744000,129.411,247.073,-26.7117,866.51


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1748000
  counters:
    num_agent_steps_sampled: 1748000
    num_agent_steps_trained: 1748000
    num_env_steps_sampled: 1748000
    num_env_steps_trained: 1748000
  custom_metrics: {}
  date: 2022-07-23_23-20-30
  done: false
  episode_len_mean: 890.07
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 133.464787183353
  episode_reward_min: -26.711698320221984
  episodes_this_iter: 4
  episodes_total: 2787
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.890643298625946
          entropy_coeff: 0.0
          kl: 0.004617931321263313
          model: {}
          policy_loss: -0.0015877584228292108
          total_loss: 3.6076149940490723
          vf_explained_var: 0.413914322853

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,437,7841.91,1748000,133.465,247.073,-26.7117,890.07


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,437,7841.91,1748000,133.465,247.073,-26.7117,890.07


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,437,7841.91,1748000,133.465,247.073,-26.7117,890.07


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,437,7841.91,1748000,133.465,247.073,-26.7117,890.07


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1752000
  counters:
    num_agent_steps_sampled: 1752000
    num_agent_steps_trained: 1752000
    num_env_steps_sampled: 1752000
    num_env_steps_trained: 1752000
  custom_metrics: {}
  date: 2022-07-23_23-20-52
  done: false
  episode_len_mean: 913.05
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 134.80338385786175
  episode_reward_min: -26.711698320221984
  episodes_this_iter: 4
  episodes_total: 2791
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8620878458023071
          entropy_coeff: 0.0
          kl: 0.0015750480815768242
          model: {}
          policy_loss: -0.004060834180563688
          total_loss: 2.73075008392334
          vf_explained_var: 0.42733758687

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,438,7863.38,1752000,134.803,247.073,-26.7117,913.05


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,438,7863.38,1752000,134.803,247.073,-26.7117,913.05


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,438,7863.38,1752000,134.803,247.073,-26.7117,913.05


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1756000
  counters:
    num_agent_steps_sampled: 1756000
    num_agent_steps_trained: 1756000
    num_env_steps_sampled: 1756000
    num_env_steps_trained: 1756000
  custom_metrics: {}
  date: 2022-07-23_23-21-11
  done: false
  episode_len_mean: 911.88
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 131.90531399329805
  episode_reward_min: -26.711698320221984
  episodes_this_iter: 5
  episodes_total: 2796
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.863967776298523
          entropy_coeff: 0.0
          kl: 0.0004830614780075848
          model: {}
          policy_loss: -0.004004987422376871
          total_loss: 3.3493831157684326
          vf_explained_var: 0.3923235535

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,439,7882.71,1756000,131.905,247.073,-26.7117,911.88


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,439,7882.71,1756000,131.905,247.073,-26.7117,911.88


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,439,7882.71,1756000,131.905,247.073,-26.7117,911.88


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1760000
  counters:
    num_agent_steps_sampled: 1760000
    num_agent_steps_trained: 1760000
    num_env_steps_sampled: 1760000
    num_env_steps_trained: 1760000
  custom_metrics: {}
  date: 2022-07-23_23-21-30
  done: false
  episode_len_mean: 928.79
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 133.61457071240002
  episode_reward_min: -25.077141757070393
  episodes_this_iter: 5
  episodes_total: 2801
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8572452068328857
          entropy_coeff: 0.0
          kl: 0.0019295872189104557
          model: {}
          policy_loss: -0.003425143426284194
          total_loss: 3.5374605655670166
          vf_explained_var: 0.373194634

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,440,7901.71,1760000,133.615,247.073,-25.0771,928.79


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,440,7901.71,1760000,133.615,247.073,-25.0771,928.79


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,440,7901.71,1760000,133.615,247.073,-25.0771,928.79


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1764000
  counters:
    num_agent_steps_sampled: 1764000
    num_agent_steps_trained: 1764000
    num_env_steps_sampled: 1764000
    num_env_steps_trained: 1764000
  custom_metrics: {}
  date: 2022-07-23_23-21-50
  done: false
  episode_len_mean: 941.99
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 134.17375440274557
  episode_reward_min: -25.077141757070393
  episodes_this_iter: 4
  episodes_total: 2805
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8529117107391357
          entropy_coeff: 0.0
          kl: 0.007451441138982773
          model: {}
          policy_loss: -0.010330895893275738
          total_loss: 2.745300531387329
          vf_explained_var: 0.35169580578

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,441,7921.41,1764000,134.174,247.073,-25.0771,941.99


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,441,7921.41,1764000,134.174,247.073,-25.0771,941.99


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,441,7921.41,1764000,134.174,247.073,-25.0771,941.99


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1768000
  counters:
    num_agent_steps_sampled: 1768000
    num_agent_steps_trained: 1768000
    num_env_steps_sampled: 1768000
    num_env_steps_trained: 1768000
  custom_metrics: {}
  date: 2022-07-23_23-22-10
  done: false
  episode_len_mean: 949.73
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 135.83464121734815
  episode_reward_min: -25.077141757070393
  episodes_this_iter: 4
  episodes_total: 2809
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8882099986076355
          entropy_coeff: 0.0
          kl: 0.003242266131564975
          model: {}
          policy_loss: -0.023748258128762245
          total_loss: 2.7887449264526367
          vf_explained_var: 0.3756463229

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,442,7941.06,1768000,135.835,247.073,-25.0771,949.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,442,7941.06,1768000,135.835,247.073,-25.0771,949.73


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,442,7941.06,1768000,135.835,247.073,-25.0771,949.73


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1772000
  counters:
    num_agent_steps_sampled: 1772000
    num_agent_steps_trained: 1772000
    num_env_steps_sampled: 1772000
    num_env_steps_trained: 1772000
  custom_metrics: {}
  date: 2022-07-23_23-22-28
  done: false
  episode_len_mean: 943.06
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 134.7258839163451
  episode_reward_min: -25.077141757070393
  episodes_this_iter: 5
  episodes_total: 2814
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8802492022514343
          entropy_coeff: 0.0
          kl: 0.002629289636388421
          model: {}
          policy_loss: -0.0005254753050394356
          total_loss: 3.7919938564300537
          vf_explained_var: 0.3763737082

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,443,7959.09,1772000,134.726,247.073,-25.0771,943.06


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,443,7959.09,1772000,134.726,247.073,-25.0771,943.06


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,443,7959.09,1772000,134.726,247.073,-25.0771,943.06


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1776000
  counters:
    num_agent_steps_sampled: 1776000
    num_agent_steps_trained: 1776000
    num_env_steps_sampled: 1776000
    num_env_steps_trained: 1776000
  custom_metrics: {}
  date: 2022-07-23_23-22-46
  done: false
  episode_len_mean: 943.06
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 134.27940930658383
  episode_reward_min: -25.077141757070393
  episodes_this_iter: 4
  episodes_total: 2818
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8618001341819763
          entropy_coeff: 0.0
          kl: 0.006098844110965729
          model: {}
          policy_loss: -0.005577750038355589
          total_loss: 2.6939432621002197
          vf_explained_var: 0.2749433517

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,444,7976.91,1776000,134.279,247.073,-25.0771,943.06


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,444,7976.91,1776000,134.279,247.073,-25.0771,943.06


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,444,7976.91,1776000,134.279,247.073,-25.0771,943.06


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1780000
  counters:
    num_agent_steps_sampled: 1780000
    num_agent_steps_trained: 1780000
    num_env_steps_sampled: 1780000
    num_env_steps_trained: 1780000
  custom_metrics: {}
  date: 2022-07-23_23-23-05
  done: false
  episode_len_mean: 950.8
  episode_media: {}
  episode_reward_max: 247.0726710492794
  episode_reward_mean: 134.23884735755726
  episode_reward_min: -25.077141757070393
  episodes_this_iter: 4
  episodes_total: 2822
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8806745409965515
          entropy_coeff: 0.0
          kl: 0.003234204137697816
          model: {}
          policy_loss: -0.00307458289898932
          total_loss: 3.2302932739257812
          vf_explained_var: 0.232576340436

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,445,7995.96,1780000,134.239,247.073,-25.0771,950.8


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,445,7995.96,1780000,134.239,247.073,-25.0771,950.8


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,445,7995.96,1780000,134.239,247.073,-25.0771,950.8


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1784000
  counters:
    num_agent_steps_sampled: 1784000
    num_agent_steps_trained: 1784000
    num_env_steps_sampled: 1784000
    num_env_steps_trained: 1784000
  custom_metrics: {}
  date: 2022-07-23_23-23-24
  done: false
  episode_len_mean: 956.8
  episode_media: {}
  episode_reward_max: 188.54986482092957
  episode_reward_mean: 133.03994969738687
  episode_reward_min: -25.077141757070393
  episodes_this_iter: 4
  episodes_total: 2826
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8430312275886536
          entropy_coeff: 0.0
          kl: 0.0068434178829193115
          model: {}
          policy_loss: -0.005408603698015213
          total_loss: 2.9534316062927246
          vf_explained_var: 0.199693992

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,446,8014.82,1784000,133.04,188.55,-25.0771,956.8


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,446,8014.82,1784000,133.04,188.55,-25.0771,956.8


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,446,8014.82,1784000,133.04,188.55,-25.0771,956.8


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1788000
  counters:
    num_agent_steps_sampled: 1788000
    num_agent_steps_trained: 1788000
    num_env_steps_sampled: 1788000
    num_env_steps_trained: 1788000
  custom_metrics: {}
  date: 2022-07-23_23-23-43
  done: false
  episode_len_mean: 956.8
  episode_media: {}
  episode_reward_max: 188.54986482092957
  episode_reward_mean: 133.60623988280358
  episode_reward_min: -25.077141757070393
  episodes_this_iter: 4
  episodes_total: 2830
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8128837943077087
          entropy_coeff: 0.0
          kl: 0.005036926828324795
          model: {}
          policy_loss: -0.005942551419138908
          total_loss: 2.884213924407959
          vf_explained_var: 0.26968783140

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,447,8034.1,1788000,133.606,188.55,-25.0771,956.8


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,447,8034.1,1788000,133.606,188.55,-25.0771,956.8


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,447,8034.1,1788000,133.606,188.55,-25.0771,956.8


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1792000
  counters:
    num_agent_steps_sampled: 1792000
    num_agent_steps_trained: 1792000
    num_env_steps_sampled: 1792000
    num_env_steps_trained: 1792000
  custom_metrics: {}
  date: 2022-07-23_23-24-02
  done: false
  episode_len_mean: 964.66
  episode_media: {}
  episode_reward_max: 188.54986482092957
  episode_reward_mean: 134.49817136481207
  episode_reward_min: -25.077141757070393
  episodes_this_iter: 4
  episodes_total: 2834
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8448300957679749
          entropy_coeff: 0.0
          kl: 0.0005872775218449533
          model: {}
          policy_loss: -0.0028046858496963978
          total_loss: 2.9545164108276367
          vf_explained_var: 0.2541131

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,448,8053.02,1792000,134.498,188.55,-25.0771,964.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,448,8053.02,1792000,134.498,188.55,-25.0771,964.66


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,448,8053.02,1792000,134.498,188.55,-25.0771,964.66


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1796000
  counters:
    num_agent_steps_sampled: 1796000
    num_agent_steps_trained: 1796000
    num_env_steps_sampled: 1796000
    num_env_steps_trained: 1796000
  custom_metrics: {}
  date: 2022-07-23_23-24-21
  done: false
  episode_len_mean: 963.69
  episode_media: {}
  episode_reward_max: 257.45636829625533
  episode_reward_mean: 136.0916120490731
  episode_reward_min: -25.077141757070393
  episodes_this_iter: 5
  episodes_total: 2839
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7031763195991516
          entropy_coeff: 0.0
          kl: 0.008814395405352116
          model: {}
          policy_loss: -0.009061740711331367
          total_loss: 3.486156940460205
          vf_explained_var: 0.24231167137

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,449,8071.36,1796000,136.092,257.456,-25.0771,963.69


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,449,8071.36,1796000,136.092,257.456,-25.0771,963.69


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,449,8071.36,1796000,136.092,257.456,-25.0771,963.69


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1800000
  counters:
    num_agent_steps_sampled: 1800000
    num_agent_steps_trained: 1800000
    num_env_steps_sampled: 1800000
    num_env_steps_trained: 1800000
  custom_metrics: {}
  date: 2022-07-23_23-24-39
  done: false
  episode_len_mean: 944.65
  episode_media: {}
  episode_reward_max: 267.7440812990504
  episode_reward_mean: 140.1198565107535
  episode_reward_min: -25.077141757070393
  episodes_this_iter: 6
  episodes_total: 2845
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7023137211799622
          entropy_coeff: 0.0
          kl: 0.00404954282566905
          model: {}
          policy_loss: -0.007672926876693964
          total_loss: 5.182424068450928
          vf_explained_var: 0.1529325097799

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,450,8090.12,1800000,140.12,267.744,-25.0771,944.65


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,450,8090.12,1800000,140.12,267.744,-25.0771,944.65


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,450,8090.12,1800000,140.12,267.744,-25.0771,944.65


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1804000
  counters:
    num_agent_steps_sampled: 1804000
    num_agent_steps_trained: 1804000
    num_env_steps_sampled: 1804000
    num_env_steps_trained: 1804000
  custom_metrics: {}
  date: 2022-07-23_23-24-58
  done: false
  episode_len_mean: 920.02
  episode_media: {}
  episode_reward_max: 267.7440812990504
  episode_reward_mean: 145.47449543422562
  episode_reward_min: -25.077141757070393
  episodes_this_iter: 6
  episodes_total: 2851
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7619760036468506
          entropy_coeff: 0.0
          kl: 0.001233316375873983
          model: {}
          policy_loss: -0.00018677019397728145
          total_loss: 7.0142693519592285
          vf_explained_var: 0.10769014

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,451,8109.14,1804000,145.474,267.744,-25.0771,920.02


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,451,8109.14,1804000,145.474,267.744,-25.0771,920.02


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,451,8109.14,1804000,145.474,267.744,-25.0771,920.02


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1808000
  counters:
    num_agent_steps_sampled: 1808000
    num_agent_steps_trained: 1808000
    num_env_steps_sampled: 1808000
    num_env_steps_trained: 1808000
  custom_metrics: {}
  date: 2022-07-23_23-25-17
  done: false
  episode_len_mean: 884.56
  episode_media: {}
  episode_reward_max: 273.3967418690968
  episode_reward_mean: 153.75937046227043
  episode_reward_min: -25.077141757070393
  episodes_this_iter: 8
  episodes_total: 2859
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8006417155265808
          entropy_coeff: 0.0
          kl: 0.010273159481585026
          model: {}
          policy_loss: -0.010770709253847599
          total_loss: 6.896061420440674
          vf_explained_var: 0.04408649727

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,452,8127.21,1808000,153.759,273.397,-25.0771,884.56


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,452,8127.21,1808000,153.759,273.397,-25.0771,884.56


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,452,8127.21,1808000,153.759,273.397,-25.0771,884.56


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1812000
  counters:
    num_agent_steps_sampled: 1812000
    num_agent_steps_trained: 1812000
    num_env_steps_sampled: 1812000
    num_env_steps_trained: 1812000
  custom_metrics: {}
  date: 2022-07-23_23-25-35
  done: false
  episode_len_mean: 823.17
  episode_media: {}
  episode_reward_max: 286.7845485003025
  episode_reward_mean: 164.35120144222617
  episode_reward_min: -25.077141757070393
  episodes_this_iter: 10
  episodes_total: 2869
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.8228488564491272
          entropy_coeff: 0.0
          kl: 0.006379168946295977
          model: {}
          policy_loss: -0.007803493645042181
          total_loss: 8.123733520507812
          vf_explained_var: -0.132374227

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,453,8145.17,1812000,164.351,286.785,-25.0771,823.17


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,453,8145.17,1812000,164.351,286.785,-25.0771,823.17


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,453,8145.17,1812000,164.351,286.785,-25.0771,823.17


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1816000
  counters:
    num_agent_steps_sampled: 1816000
    num_agent_steps_trained: 1816000
    num_env_steps_sampled: 1816000
    num_env_steps_trained: 1816000
  custom_metrics: {}
  date: 2022-07-23_23-25-53
  done: false
  episode_len_mean: 766.02
  episode_media: {}
  episode_reward_max: 301.51895047288417
  episode_reward_mean: 173.7323845814114
  episode_reward_min: -37.63887999023912
  episodes_this_iter: 10
  episodes_total: 2879
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.6956703066825867
          entropy_coeff: 0.0
          kl: 0.00110441236756742
          model: {}
          policy_loss: -0.004980310797691345
          total_loss: 7.861454486846924
          vf_explained_var: -0.04569614306

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,454,8163.21,1816000,173.732,301.519,-37.6389,766.02


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,454,8163.21,1816000,173.732,301.519,-37.6389,766.02


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,454,8163.21,1816000,173.732,301.519,-37.6389,766.02


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1820000
  counters:
    num_agent_steps_sampled: 1820000
    num_agent_steps_trained: 1820000
    num_env_steps_sampled: 1820000
    num_env_steps_trained: 1820000
  custom_metrics: {}
  date: 2022-07-23_23-26-11
  done: false
  episode_len_mean: 738.87
  episode_media: {}
  episode_reward_max: 301.51895047288417
  episode_reward_mean: 177.47222547976787
  episode_reward_min: -37.63887999023912
  episodes_this_iter: 6
  episodes_total: 2885
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.6572701930999756
          entropy_coeff: 0.0
          kl: 0.005434381775557995
          model: {}
          policy_loss: -0.005352029576897621
          total_loss: 4.600157737731934
          vf_explained_var: 0.28325095772

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,455,8181.79,1820000,177.472,301.519,-37.6389,738.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,455,8181.79,1820000,177.472,301.519,-37.6389,738.87


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,455,8181.79,1820000,177.472,301.519,-37.6389,738.87


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1824000
  counters:
    num_agent_steps_sampled: 1824000
    num_agent_steps_trained: 1824000
    num_env_steps_sampled: 1824000
    num_env_steps_trained: 1824000
  custom_metrics: {}
  date: 2022-07-23_23-26-30
  done: false
  episode_len_mean: 704.92
  episode_media: {}
  episode_reward_max: 301.51895047288417
  episode_reward_mean: 184.2395818777096
  episode_reward_min: -37.63887999023912
  episodes_this_iter: 8
  episodes_total: 2893
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7543215751647949
          entropy_coeff: 0.0
          kl: 0.0019271260825917125
          model: {}
          policy_loss: -0.000996106187812984
          total_loss: 6.4182562828063965
          vf_explained_var: 0.1258612424

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,456,8200.33,1824000,184.24,301.519,-37.6389,704.92


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,456,8200.33,1824000,184.24,301.519,-37.6389,704.92


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,456,8200.33,1824000,184.24,301.519,-37.6389,704.92


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1828000
  counters:
    num_agent_steps_sampled: 1828000
    num_agent_steps_trained: 1828000
    num_env_steps_sampled: 1828000
    num_env_steps_trained: 1828000
  custom_metrics: {}
  date: 2022-07-23_23-26-48
  done: false
  episode_len_mean: 671.71
  episode_media: {}
  episode_reward_max: 301.51895047288417
  episode_reward_mean: 195.50069775106786
  episode_reward_min: -37.63887999023912
  episodes_this_iter: 9
  episodes_total: 2902
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.747657299041748
          entropy_coeff: 0.0
          kl: 0.0009957834845408797
          model: {}
          policy_loss: -0.005606628954410553
          total_loss: 6.667524337768555
          vf_explained_var: 0.00297060422

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,457,8218.37,1828000,195.501,301.519,-37.6389,671.71


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,457,8218.37,1828000,195.501,301.519,-37.6389,671.71


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,RUNNING,127.0.0.1:21104,457,8218.37,1828000,195.501,301.519,-37.6389,671.71


Result for PPO_LunarLander-v2_e6219_00000:
  agent_timesteps_total: 1832000
  counters:
    num_agent_steps_sampled: 1832000
    num_agent_steps_trained: 1832000
    num_env_steps_sampled: 1832000
    num_env_steps_trained: 1832000
  custom_metrics: {}
  date: 2022-07-23_23-27-06
  done: true
  episode_len_mean: 618.77
  episode_media: {}
  episode_reward_max: 301.51895047288417
  episode_reward_mean: 201.26650022237737
  episode_reward_min: -37.63887999023912
  episodes_this_iter: 8
  episodes_total: 2910
  experiment_id: 1b793ba02dae47f8a169e132e2056f77
  hostname: LAPTOP-MUL4L8MS
  info:
    learner:
      default_policy:
        custom_metrics: {}
        learner_stats:
          cur_kl_coeff: 0.0
          cur_lr: 4.999999873689376e-05
          entropy: 0.7424366474151611
          entropy_coeff: 0.0
          kl: 0.006851014215499163
          model: {}
          policy_loss: -0.006885472685098648
          total_loss: 5.920274257659912
          vf_explained_var: 0.059905447065

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_LunarLander-v2_e6219_00000,TERMINATED,127.0.0.1:21104,458,8236.5,1832000,201.267,301.519,-37.6389,618.77


2022-07-23 23:27:07,108	INFO tune.py:747 -- Total run time: 8291.37 seconds (8290.92 seconds for the tuning loop).


In [11]:
# restore a trainer from the last checkpoint
trial = analysis.get_best_logdir("episode_reward_mean", "max")
checkpoint = analysis.get_best_checkpoint(
  trial,
  "training_iteration",
  "max",
)
trainer = PPOTrainer(config=config)
trainer.restore(checkpoint)

2022-07-23 23:29:38,818	INFO trainer.py:2332 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
2022-07-23 23:29:38,819	INFO ppo.py:414 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
2022-07-23 23:29:38,820	INFO trainer.py:903 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
2022-07-23 23:29:54,201	INFO trainable.py:159 -- Trainable.setup took 15.385 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2022-07-23 23:29:54,280	INFO trainable.py:588 -- Restored on 127.0.0.1 from checkpoint: C:\Users\brieg\ray_results\PPO\PPO_LunarLander-v2_e6219_0

## Evaluate the Agent :

In [19]:
video_name = "after_training"
video_path = check_video_folder_sanity(path, video_name)

after_video = VideoRecorder(env, video_path + ".mp4", enabled=video_name is not None)
observation = env.reset()
done = False
while not done:
    env.render()
    after_video.capture_frame()
    action = trainer.compute_single_action(observation)
    observation, reward, done, info = env.step(action)
after_video.close()
env.close()

In [20]:
Video(video_path + ".mp4")